xref: /llvm-project/llvm/lib/Object/COFFModuleDefinition.cpp (revision 90e9c6e36e8b928240dfd61c2dfd30cf26108c07)
1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Object/COFFModuleDefinition.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFFImportFile.h"
21 #include "llvm/Object/Error.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/Path.h"
24 
25 using namespace llvm::COFF;
26 using namespace llvm;
27 
28 namespace llvm {
29 namespace object {
30 
31 enum Kind {
32   Unknown,
33   Eof,
34   Identifier,
35   Comma,
36   Equal,
37   EqualEqual,
38   KwBase,
39   KwConstant,
40   KwData,
41   KwExports,
42   KwHeapsize,
43   KwLibrary,
44   KwName,
45   KwNoname,
46   KwPrivate,
47   KwStacksize,
48   KwVersion,
49 };
50 
51 struct Token {
52   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
53   Kind K;
54   StringRef Value;
55 };
56 
57 static bool isDecorated(StringRef Sym, bool MingwDef) {
58   // In def files, the symbols can either be listed decorated or undecorated.
59   //
60   // - For cdecl symbols, only the undecorated form is allowed.
61   // - For fastcall and vectorcall symbols, both fully decorated or
62   //   undecorated forms can be present.
63   // - For stdcall symbols in non-MinGW environments, the decorated form is
64   //   fully decorated with leading underscore and trailing stack argument
65   //   size - like "_Func@0".
66   // - In MinGW def files, a decorated stdcall symbol does not include the
67   //   leading underscore though, like "Func@0".
68 
69   // This function controls whether a leading underscore should be added to
70   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
71   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
72   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
73   // as decorated, i.e. don't add any more leading underscores.
74   // We can't check for a leading underscore here, since function names
75   // themselves can start with an underscore, while a second one still needs
76   // to be added.
77   return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
78          (!MingwDef && Sym.contains('@'));
79 }
80 
81 class Lexer {
82 public:
83   Lexer(StringRef S) : Buf(S) {}
84 
85   Token lex() {
86     Buf = Buf.trim();
87     if (Buf.empty())
88       return Token(Eof);
89 
90     switch (Buf[0]) {
91     case '\0':
92       return Token(Eof);
93     case ';': {
94       size_t End = Buf.find('\n');
95       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
96       return lex();
97     }
98     case '=':
99       Buf = Buf.drop_front();
100       if (Buf.consume_front("="))
101         return Token(EqualEqual, "==");
102       return Token(Equal, "=");
103     case ',':
104       Buf = Buf.drop_front();
105       return Token(Comma, ",");
106     case '"': {
107       StringRef S;
108       std::tie(S, Buf) = Buf.substr(1).split('"');
109       return Token(Identifier, S);
110     }
111     default: {
112       size_t End = Buf.find_first_of("=,;\r\n \t\v");
113       StringRef Word = Buf.substr(0, End);
114       Kind K = llvm::StringSwitch<Kind>(Word)
115                    .Case("BASE", KwBase)
116                    .Case("CONSTANT", KwConstant)
117                    .Case("DATA", KwData)
118                    .Case("EXPORTS", KwExports)
119                    .Case("HEAPSIZE", KwHeapsize)
120                    .Case("LIBRARY", KwLibrary)
121                    .Case("NAME", KwName)
122                    .Case("NONAME", KwNoname)
123                    .Case("PRIVATE", KwPrivate)
124                    .Case("STACKSIZE", KwStacksize)
125                    .Case("VERSION", KwVersion)
126                    .Default(Identifier);
127       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
128       return Token(K, Word);
129     }
130     }
131   }
132 
133 private:
134   StringRef Buf;
135 };
136 
137 class Parser {
138 public:
139   explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
140       : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
141     if (Machine != IMAGE_FILE_MACHINE_I386)
142       AddUnderscores = false;
143   }
144 
145   Expected<COFFModuleDefinition> parse() {
146     do {
147       if (Error Err = parseOne())
148         return std::move(Err);
149     } while (Tok.K != Eof);
150     return Info;
151   }
152 
153 private:
154   void read() {
155     if (Stack.empty()) {
156       Tok = Lex.lex();
157       return;
158     }
159     Tok = Stack.back();
160     Stack.pop_back();
161   }
162 
163   Error readAsInt(uint64_t *I) {
164     read();
165     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
166       return createError("integer expected");
167     return Error::success();
168   }
169 
170   Error expect(Kind Expected, StringRef Msg) {
171     read();
172     if (Tok.K != Expected)
173       return createError(Msg);
174     return Error::success();
175   }
176 
177   void unget() { Stack.push_back(Tok); }
178 
179   Error parseOne() {
180     read();
181     switch (Tok.K) {
182     case Eof:
183       return Error::success();
184     case KwExports:
185       for (;;) {
186         read();
187         if (Tok.K != Identifier) {
188           unget();
189           return Error::success();
190         }
191         if (Error Err = parseExport())
192           return Err;
193       }
194     case KwHeapsize:
195       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
196     case KwStacksize:
197       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
198     case KwLibrary:
199     case KwName: {
200       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
201       std::string Name;
202       if (Error Err = parseName(&Name, &Info.ImageBase))
203         return Err;
204 
205       Info.ImportName = Name;
206 
207       // Set the output file, but don't override /out if it was already passed.
208       if (Info.OutputFile.empty()) {
209         Info.OutputFile = Name;
210         // Append the appropriate file extension if not already present.
211         if (!sys::path::has_extension(Name))
212           Info.OutputFile += IsDll ? ".dll" : ".exe";
213       }
214 
215       return Error::success();
216     }
217     case KwVersion:
218       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
219     default:
220       return createError("unknown directive: " + Tok.Value);
221     }
222   }
223 
224   Error parseExport() {
225     COFFShortExport E;
226     E.Name = std::string(Tok.Value);
227     read();
228     if (Tok.K == Equal) {
229       read();
230       if (Tok.K != Identifier)
231         return createError("identifier expected, but got " + Tok.Value);
232       E.ExtName = E.Name;
233       E.Name = std::string(Tok.Value);
234     } else {
235       unget();
236     }
237 
238     if (AddUnderscores) {
239       if (!isDecorated(E.Name, MingwDef))
240         E.Name = (std::string("_").append(E.Name));
241       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
242         E.ExtName = (std::string("_").append(E.ExtName));
243     }
244 
245     for (;;) {
246       read();
247       if (Tok.K == Identifier && Tok.Value[0] == '@') {
248         if (Tok.Value == "@") {
249           // "foo @ 10"
250           read();
251           Tok.Value.getAsInteger(10, E.Ordinal);
252         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
253           // "foo \n @bar" - Not an ordinal modifier at all, but the next
254           // export (fastcall decorated) - complete the current one.
255           unget();
256           Info.Exports.push_back(E);
257           return Error::success();
258         }
259         // "foo @10"
260         read();
261         if (Tok.K == KwNoname) {
262           E.Noname = true;
263         } else {
264           unget();
265         }
266         continue;
267       }
268       if (Tok.K == KwData) {
269         E.Data = true;
270         continue;
271       }
272       if (Tok.K == KwConstant) {
273         E.Constant = true;
274         continue;
275       }
276       if (Tok.K == KwPrivate) {
277         E.Private = true;
278         continue;
279       }
280       if (Tok.K == EqualEqual) {
281         read();
282         E.AliasTarget = std::string(Tok.Value);
283         if (AddUnderscores && !isDecorated(E.AliasTarget, MingwDef))
284           E.AliasTarget = std::string("_").append(E.AliasTarget);
285         continue;
286       }
287       unget();
288       Info.Exports.push_back(E);
289       return Error::success();
290     }
291   }
292 
293   // HEAPSIZE/STACKSIZE reserve[,commit]
294   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
295     if (Error Err = readAsInt(Reserve))
296       return Err;
297     read();
298     if (Tok.K != Comma) {
299       unget();
300       Commit = nullptr;
301       return Error::success();
302     }
303     if (Error Err = readAsInt(Commit))
304       return Err;
305     return Error::success();
306   }
307 
308   // NAME outputPath [BASE=address]
309   Error parseName(std::string *Out, uint64_t *Baseaddr) {
310     read();
311     if (Tok.K == Identifier) {
312       *Out = std::string(Tok.Value);
313     } else {
314       *Out = "";
315       unget();
316       return Error::success();
317     }
318     read();
319     if (Tok.K == KwBase) {
320       if (Error Err = expect(Equal, "'=' expected"))
321         return Err;
322       if (Error Err = readAsInt(Baseaddr))
323         return Err;
324     } else {
325       unget();
326       *Baseaddr = 0;
327     }
328     return Error::success();
329   }
330 
331   // VERSION major[.minor]
332   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
333     read();
334     if (Tok.K != Identifier)
335       return createError("identifier expected, but got " + Tok.Value);
336     StringRef V1, V2;
337     std::tie(V1, V2) = Tok.Value.split('.');
338     if (V1.getAsInteger(10, *Major))
339       return createError("integer expected, but got " + Tok.Value);
340     if (V2.empty())
341       *Minor = 0;
342     else if (V2.getAsInteger(10, *Minor))
343       return createError("integer expected, but got " + Tok.Value);
344     return Error::success();
345   }
346 
347   Lexer Lex;
348   Token Tok;
349   std::vector<Token> Stack;
350   MachineTypes Machine;
351   COFFModuleDefinition Info;
352   bool MingwDef;
353   bool AddUnderscores;
354 };
355 
356 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
357                                                          MachineTypes Machine,
358                                                          bool MingwDef,
359                                                          bool AddUnderscores) {
360   return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
361 }
362 
363 } // namespace object
364 } // namespace llvm
365