1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Windows-specific. 10 // A parser for the module-definition file (.def file). 11 // 12 // The format of module-definition files are described in this document: 13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Object/COFFModuleDefinition.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Object/COFFImportFile.h" 21 #include "llvm/Object/Error.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/Path.h" 24 25 using namespace llvm::COFF; 26 using namespace llvm; 27 28 namespace llvm { 29 namespace object { 30 31 enum Kind { 32 Unknown, 33 Eof, 34 Identifier, 35 Comma, 36 Equal, 37 EqualEqual, 38 KwBase, 39 KwConstant, 40 KwData, 41 KwExports, 42 KwHeapsize, 43 KwLibrary, 44 KwName, 45 KwNoname, 46 KwPrivate, 47 KwStacksize, 48 KwVersion, 49 }; 50 51 struct Token { 52 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} 53 Kind K; 54 StringRef Value; 55 }; 56 57 static bool isDecorated(StringRef Sym, bool MingwDef) { 58 // In def files, the symbols can either be listed decorated or undecorated. 59 // 60 // - For cdecl symbols, only the undecorated form is allowed. 61 // - For fastcall and vectorcall symbols, both fully decorated or 62 // undecorated forms can be present. 63 // - For stdcall symbols in non-MinGW environments, the decorated form is 64 // fully decorated with leading underscore and trailing stack argument 65 // size - like "_Func@0". 66 // - In MinGW def files, a decorated stdcall symbol does not include the 67 // leading underscore though, like "Func@0". 68 69 // This function controls whether a leading underscore should be added to 70 // the given symbol name or not. For MinGW, treat a stdcall symbol name such 71 // as "Func@0" as undecorated, i.e. a leading underscore must be added. 72 // For non-MinGW, look for '@' in the whole string and consider "_Func@0" 73 // as decorated, i.e. don't add any more leading underscores. 74 // We can't check for a leading underscore here, since function names 75 // themselves can start with an underscore, while a second one still needs 76 // to be added. 77 return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") || 78 (!MingwDef && Sym.contains('@')); 79 } 80 81 class Lexer { 82 public: 83 Lexer(StringRef S) : Buf(S) {} 84 85 Token lex() { 86 Buf = Buf.trim(); 87 if (Buf.empty()) 88 return Token(Eof); 89 90 switch (Buf[0]) { 91 case '\0': 92 return Token(Eof); 93 case ';': { 94 size_t End = Buf.find('\n'); 95 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 96 return lex(); 97 } 98 case '=': 99 Buf = Buf.drop_front(); 100 if (Buf.consume_front("=")) 101 return Token(EqualEqual, "=="); 102 return Token(Equal, "="); 103 case ',': 104 Buf = Buf.drop_front(); 105 return Token(Comma, ","); 106 case '"': { 107 StringRef S; 108 std::tie(S, Buf) = Buf.substr(1).split('"'); 109 return Token(Identifier, S); 110 } 111 default: { 112 size_t End = Buf.find_first_of("=,;\r\n \t\v"); 113 StringRef Word = Buf.substr(0, End); 114 Kind K = llvm::StringSwitch<Kind>(Word) 115 .Case("BASE", KwBase) 116 .Case("CONSTANT", KwConstant) 117 .Case("DATA", KwData) 118 .Case("EXPORTS", KwExports) 119 .Case("HEAPSIZE", KwHeapsize) 120 .Case("LIBRARY", KwLibrary) 121 .Case("NAME", KwName) 122 .Case("NONAME", KwNoname) 123 .Case("PRIVATE", KwPrivate) 124 .Case("STACKSIZE", KwStacksize) 125 .Case("VERSION", KwVersion) 126 .Default(Identifier); 127 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 128 return Token(K, Word); 129 } 130 } 131 } 132 133 private: 134 StringRef Buf; 135 }; 136 137 class Parser { 138 public: 139 explicit Parser(StringRef S, MachineTypes M, bool B, bool AU) 140 : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) { 141 if (Machine != IMAGE_FILE_MACHINE_I386) 142 AddUnderscores = false; 143 } 144 145 Expected<COFFModuleDefinition> parse() { 146 do { 147 if (Error Err = parseOne()) 148 return std::move(Err); 149 } while (Tok.K != Eof); 150 return Info; 151 } 152 153 private: 154 void read() { 155 if (Stack.empty()) { 156 Tok = Lex.lex(); 157 return; 158 } 159 Tok = Stack.back(); 160 Stack.pop_back(); 161 } 162 163 Error readAsInt(uint64_t *I) { 164 read(); 165 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) 166 return createError("integer expected"); 167 return Error::success(); 168 } 169 170 Error expect(Kind Expected, StringRef Msg) { 171 read(); 172 if (Tok.K != Expected) 173 return createError(Msg); 174 return Error::success(); 175 } 176 177 void unget() { Stack.push_back(Tok); } 178 179 Error parseOne() { 180 read(); 181 switch (Tok.K) { 182 case Eof: 183 return Error::success(); 184 case KwExports: 185 for (;;) { 186 read(); 187 if (Tok.K != Identifier) { 188 unget(); 189 return Error::success(); 190 } 191 if (Error Err = parseExport()) 192 return Err; 193 } 194 case KwHeapsize: 195 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); 196 case KwStacksize: 197 return parseNumbers(&Info.StackReserve, &Info.StackCommit); 198 case KwLibrary: 199 case KwName: { 200 bool IsDll = Tok.K == KwLibrary; // Check before parseName. 201 std::string Name; 202 if (Error Err = parseName(&Name, &Info.ImageBase)) 203 return Err; 204 205 Info.ImportName = Name; 206 207 // Set the output file, but don't override /out if it was already passed. 208 if (Info.OutputFile.empty()) { 209 Info.OutputFile = Name; 210 // Append the appropriate file extension if not already present. 211 if (!sys::path::has_extension(Name)) 212 Info.OutputFile += IsDll ? ".dll" : ".exe"; 213 } 214 215 return Error::success(); 216 } 217 case KwVersion: 218 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); 219 default: 220 return createError("unknown directive: " + Tok.Value); 221 } 222 } 223 224 Error parseExport() { 225 COFFShortExport E; 226 E.Name = std::string(Tok.Value); 227 read(); 228 if (Tok.K == Equal) { 229 read(); 230 if (Tok.K != Identifier) 231 return createError("identifier expected, but got " + Tok.Value); 232 E.ExtName = E.Name; 233 E.Name = std::string(Tok.Value); 234 } else { 235 unget(); 236 } 237 238 if (AddUnderscores) { 239 if (!isDecorated(E.Name, MingwDef)) 240 E.Name = (std::string("_").append(E.Name)); 241 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) 242 E.ExtName = (std::string("_").append(E.ExtName)); 243 } 244 245 for (;;) { 246 read(); 247 if (Tok.K == Identifier && Tok.Value[0] == '@') { 248 if (Tok.Value == "@") { 249 // "foo @ 10" 250 read(); 251 Tok.Value.getAsInteger(10, E.Ordinal); 252 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { 253 // "foo \n @bar" - Not an ordinal modifier at all, but the next 254 // export (fastcall decorated) - complete the current one. 255 unget(); 256 Info.Exports.push_back(E); 257 return Error::success(); 258 } 259 // "foo @10" 260 read(); 261 if (Tok.K == KwNoname) { 262 E.Noname = true; 263 } else { 264 unget(); 265 } 266 continue; 267 } 268 if (Tok.K == KwData) { 269 E.Data = true; 270 continue; 271 } 272 if (Tok.K == KwConstant) { 273 E.Constant = true; 274 continue; 275 } 276 if (Tok.K == KwPrivate) { 277 E.Private = true; 278 continue; 279 } 280 if (Tok.K == EqualEqual) { 281 read(); 282 E.AliasTarget = std::string(Tok.Value); 283 if (AddUnderscores && !isDecorated(E.AliasTarget, MingwDef)) 284 E.AliasTarget = std::string("_").append(E.AliasTarget); 285 continue; 286 } 287 unget(); 288 Info.Exports.push_back(E); 289 return Error::success(); 290 } 291 } 292 293 // HEAPSIZE/STACKSIZE reserve[,commit] 294 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { 295 if (Error Err = readAsInt(Reserve)) 296 return Err; 297 read(); 298 if (Tok.K != Comma) { 299 unget(); 300 Commit = nullptr; 301 return Error::success(); 302 } 303 if (Error Err = readAsInt(Commit)) 304 return Err; 305 return Error::success(); 306 } 307 308 // NAME outputPath [BASE=address] 309 Error parseName(std::string *Out, uint64_t *Baseaddr) { 310 read(); 311 if (Tok.K == Identifier) { 312 *Out = std::string(Tok.Value); 313 } else { 314 *Out = ""; 315 unget(); 316 return Error::success(); 317 } 318 read(); 319 if (Tok.K == KwBase) { 320 if (Error Err = expect(Equal, "'=' expected")) 321 return Err; 322 if (Error Err = readAsInt(Baseaddr)) 323 return Err; 324 } else { 325 unget(); 326 *Baseaddr = 0; 327 } 328 return Error::success(); 329 } 330 331 // VERSION major[.minor] 332 Error parseVersion(uint32_t *Major, uint32_t *Minor) { 333 read(); 334 if (Tok.K != Identifier) 335 return createError("identifier expected, but got " + Tok.Value); 336 StringRef V1, V2; 337 std::tie(V1, V2) = Tok.Value.split('.'); 338 if (V1.getAsInteger(10, *Major)) 339 return createError("integer expected, but got " + Tok.Value); 340 if (V2.empty()) 341 *Minor = 0; 342 else if (V2.getAsInteger(10, *Minor)) 343 return createError("integer expected, but got " + Tok.Value); 344 return Error::success(); 345 } 346 347 Lexer Lex; 348 Token Tok; 349 std::vector<Token> Stack; 350 MachineTypes Machine; 351 COFFModuleDefinition Info; 352 bool MingwDef; 353 bool AddUnderscores; 354 }; 355 356 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, 357 MachineTypes Machine, 358 bool MingwDef, 359 bool AddUnderscores) { 360 return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse(); 361 } 362 363 } // namespace object 364 } // namespace llvm 365