1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Windows-specific. 11 // A parser for the module-definition file (.def file). 12 // 13 // The format of module-definition files are described in this document: 14 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Object/COFFModuleDefinition.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/StringSwitch.h" 21 #include "llvm/Object/COFF.h" 22 #include "llvm/Object/COFFImportFile.h" 23 #include "llvm/Object/Error.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/Path.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm::COFF; 29 using namespace llvm; 30 31 namespace llvm { 32 namespace object { 33 34 enum Kind { 35 Unknown, 36 Eof, 37 Identifier, 38 Comma, 39 Equal, 40 KwBase, 41 KwConstant, 42 KwData, 43 KwExports, 44 KwHeapsize, 45 KwLibrary, 46 KwName, 47 KwNoname, 48 KwPrivate, 49 KwStacksize, 50 KwVersion, 51 }; 52 53 struct Token { 54 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} 55 Kind K; 56 StringRef Value; 57 }; 58 59 static bool isDecorated(StringRef Sym, bool MingwDef) { 60 // In def files, the symbols can either be listed decorated or undecorated. 61 // 62 // - For cdecl symbols, only the undecorated form is allowed. 63 // - For fastcall and vectorcall symbols, both fully decorated or 64 // undecorated forms can be present. 65 // - For stdcall symbols in non-MinGW environments, the decorated form is 66 // fully decorated with leading underscore and trailing stack argument 67 // size - like "_Func@0". 68 // - In MinGW def files, a decorated stdcall symbol does not include the 69 // leading underscore though, like "Func@0". 70 71 // This function controls whether a leading underscore should be added to 72 // the given symbol name or not. For MinGW, treat a stdcall symbol name such 73 // as "Func@0" as undecorated, i.e. a leading underscore must be added. 74 // For non-MinGW, look for '@' in the whole string and consider "_Func@0" 75 // as decorated, i.e. don't add any more leading underscores. 76 // We can't check for a leading underscore here, since function names 77 // themselves can start with an underscore, while a second one still needs 78 // to be added. 79 return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") || 80 (!MingwDef && Sym.contains('@')); 81 } 82 83 static Error createError(const Twine &Err) { 84 return make_error<StringError>(StringRef(Err.str()), 85 object_error::parse_failed); 86 } 87 88 class Lexer { 89 public: 90 Lexer(StringRef S) : Buf(S) {} 91 92 Token lex() { 93 Buf = Buf.trim(); 94 if (Buf.empty()) 95 return Token(Eof); 96 97 switch (Buf[0]) { 98 case '\0': 99 return Token(Eof); 100 case ';': { 101 size_t End = Buf.find('\n'); 102 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 103 return lex(); 104 } 105 case '=': 106 Buf = Buf.drop_front(); 107 // GNU dlltool accepts both = and ==. 108 if (Buf.startswith("=")) 109 Buf = Buf.drop_front(); 110 return Token(Equal, "="); 111 case ',': 112 Buf = Buf.drop_front(); 113 return Token(Comma, ","); 114 case '"': { 115 StringRef S; 116 std::tie(S, Buf) = Buf.substr(1).split('"'); 117 return Token(Identifier, S); 118 } 119 default: { 120 size_t End = Buf.find_first_of("=,\r\n \t\v"); 121 StringRef Word = Buf.substr(0, End); 122 Kind K = llvm::StringSwitch<Kind>(Word) 123 .Case("BASE", KwBase) 124 .Case("CONSTANT", KwConstant) 125 .Case("DATA", KwData) 126 .Case("EXPORTS", KwExports) 127 .Case("HEAPSIZE", KwHeapsize) 128 .Case("LIBRARY", KwLibrary) 129 .Case("NAME", KwName) 130 .Case("NONAME", KwNoname) 131 .Case("PRIVATE", KwPrivate) 132 .Case("STACKSIZE", KwStacksize) 133 .Case("VERSION", KwVersion) 134 .Default(Identifier); 135 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 136 return Token(K, Word); 137 } 138 } 139 } 140 141 private: 142 StringRef Buf; 143 }; 144 145 class Parser { 146 public: 147 explicit Parser(StringRef S, MachineTypes M, bool B) 148 : Lex(S), Machine(M), MingwDef(B) {} 149 150 Expected<COFFModuleDefinition> parse() { 151 do { 152 if (Error Err = parseOne()) 153 return std::move(Err); 154 } while (Tok.K != Eof); 155 return Info; 156 } 157 158 private: 159 void read() { 160 if (Stack.empty()) { 161 Tok = Lex.lex(); 162 return; 163 } 164 Tok = Stack.back(); 165 Stack.pop_back(); 166 } 167 168 Error readAsInt(uint64_t *I) { 169 read(); 170 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) 171 return createError("integer expected"); 172 return Error::success(); 173 } 174 175 Error expect(Kind Expected, StringRef Msg) { 176 read(); 177 if (Tok.K != Expected) 178 return createError(Msg); 179 return Error::success(); 180 } 181 182 void unget() { Stack.push_back(Tok); } 183 184 Error parseOne() { 185 read(); 186 switch (Tok.K) { 187 case Eof: 188 return Error::success(); 189 case KwExports: 190 for (;;) { 191 read(); 192 if (Tok.K != Identifier) { 193 unget(); 194 return Error::success(); 195 } 196 if (Error Err = parseExport()) 197 return Err; 198 } 199 case KwHeapsize: 200 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); 201 case KwStacksize: 202 return parseNumbers(&Info.StackReserve, &Info.StackCommit); 203 case KwLibrary: 204 case KwName: { 205 bool IsDll = Tok.K == KwLibrary; // Check before parseName. 206 std::string Name; 207 if (Error Err = parseName(&Name, &Info.ImageBase)) 208 return Err; 209 210 Info.ImportName = Name; 211 212 // Set the output file, but don't override /out if it was already passed. 213 if (Info.OutputFile.empty()) { 214 Info.OutputFile = Name; 215 // Append the appropriate file extension if not already present. 216 if (!sys::path::has_extension(Name)) 217 Info.OutputFile += IsDll ? ".dll" : ".exe"; 218 } 219 220 return Error::success(); 221 } 222 case KwVersion: 223 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); 224 default: 225 return createError("unknown directive: " + Tok.Value); 226 } 227 } 228 229 Error parseExport() { 230 COFFShortExport E; 231 E.Name = Tok.Value; 232 read(); 233 if (Tok.K == Equal) { 234 read(); 235 if (Tok.K != Identifier) 236 return createError("identifier expected, but got " + Tok.Value); 237 E.ExtName = E.Name; 238 E.Name = Tok.Value; 239 } else { 240 unget(); 241 } 242 243 if (Machine == IMAGE_FILE_MACHINE_I386) { 244 if (!isDecorated(E.Name, MingwDef)) 245 E.Name = (std::string("_").append(E.Name)); 246 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) 247 E.ExtName = (std::string("_").append(E.ExtName)); 248 } 249 250 for (;;) { 251 read(); 252 if (Tok.K == Identifier && Tok.Value[0] == '@') { 253 if (Tok.Value == "@") { 254 // "foo @ 10" 255 read(); 256 Tok.Value.getAsInteger(10, E.Ordinal); 257 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { 258 // "foo \n @bar" - Not an ordinal modifier at all, but the next 259 // export (fastcall decorated) - complete the current one. 260 unget(); 261 Info.Exports.push_back(E); 262 return Error::success(); 263 } 264 // "foo @10" 265 read(); 266 if (Tok.K == KwNoname) { 267 E.Noname = true; 268 } else { 269 unget(); 270 } 271 continue; 272 } 273 if (Tok.K == KwData) { 274 E.Data = true; 275 continue; 276 } 277 if (Tok.K == KwConstant) { 278 E.Constant = true; 279 continue; 280 } 281 if (Tok.K == KwPrivate) { 282 E.Private = true; 283 continue; 284 } 285 unget(); 286 Info.Exports.push_back(E); 287 return Error::success(); 288 } 289 } 290 291 // HEAPSIZE/STACKSIZE reserve[,commit] 292 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { 293 if (Error Err = readAsInt(Reserve)) 294 return Err; 295 read(); 296 if (Tok.K != Comma) { 297 unget(); 298 Commit = nullptr; 299 return Error::success(); 300 } 301 if (Error Err = readAsInt(Commit)) 302 return Err; 303 return Error::success(); 304 } 305 306 // NAME outputPath [BASE=address] 307 Error parseName(std::string *Out, uint64_t *Baseaddr) { 308 read(); 309 if (Tok.K == Identifier) { 310 *Out = Tok.Value; 311 } else { 312 *Out = ""; 313 unget(); 314 return Error::success(); 315 } 316 read(); 317 if (Tok.K == KwBase) { 318 if (Error Err = expect(Equal, "'=' expected")) 319 return Err; 320 if (Error Err = readAsInt(Baseaddr)) 321 return Err; 322 } else { 323 unget(); 324 *Baseaddr = 0; 325 } 326 return Error::success(); 327 } 328 329 // VERSION major[.minor] 330 Error parseVersion(uint32_t *Major, uint32_t *Minor) { 331 read(); 332 if (Tok.K != Identifier) 333 return createError("identifier expected, but got " + Tok.Value); 334 StringRef V1, V2; 335 std::tie(V1, V2) = Tok.Value.split('.'); 336 if (V1.getAsInteger(10, *Major)) 337 return createError("integer expected, but got " + Tok.Value); 338 if (V2.empty()) 339 *Minor = 0; 340 else if (V2.getAsInteger(10, *Minor)) 341 return createError("integer expected, but got " + Tok.Value); 342 return Error::success(); 343 } 344 345 Lexer Lex; 346 Token Tok; 347 std::vector<Token> Stack; 348 MachineTypes Machine; 349 COFFModuleDefinition Info; 350 bool MingwDef; 351 }; 352 353 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, 354 MachineTypes Machine, 355 bool MingwDef) { 356 return Parser(MB.getBuffer(), Machine, MingwDef).parse(); 357 } 358 359 } // namespace object 360 } // namespace llvm 361