1 #include "../include/KaleidoscopeJIT.h" 2 #include "llvm/ADT/APFloat.h" 3 #include "llvm/ADT/STLExtras.h" 4 #include "llvm/IR/BasicBlock.h" 5 #include "llvm/IR/Constants.h" 6 #include "llvm/IR/DerivedTypes.h" 7 #include "llvm/IR/Function.h" 8 #include "llvm/IR/IRBuilder.h" 9 #include "llvm/IR/Instructions.h" 10 #include "llvm/IR/LLVMContext.h" 11 #include "llvm/IR/LegacyPassManager.h" 12 #include "llvm/IR/Module.h" 13 #include "llvm/IR/Type.h" 14 #include "llvm/IR/Verifier.h" 15 #include "llvm/Support/TargetSelect.h" 16 #include "llvm/Target/TargetMachine.h" 17 #include "llvm/Transforms/InstCombine/InstCombine.h" 18 #include "llvm/Transforms/Scalar.h" 19 #include "llvm/Transforms/Scalar/GVN.h" 20 #include "llvm/Transforms/Utils.h" 21 #include <algorithm> 22 #include <cassert> 23 #include <cctype> 24 #include <cstdint> 25 #include <cstdio> 26 #include <cstdlib> 27 #include <map> 28 #include <memory> 29 #include <string> 30 #include <utility> 31 #include <vector> 32 33 using namespace llvm; 34 using namespace llvm::orc; 35 36 //===----------------------------------------------------------------------===// 37 // Lexer 38 //===----------------------------------------------------------------------===// 39 40 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 41 // of these for known things. 42 enum Token { 43 tok_eof = -1, 44 45 // commands 46 tok_def = -2, 47 tok_extern = -3, 48 49 // primary 50 tok_identifier = -4, 51 tok_number = -5, 52 53 // control 54 tok_if = -6, 55 tok_then = -7, 56 tok_else = -8, 57 tok_for = -9, 58 tok_in = -10, 59 60 // operators 61 tok_binary = -11, 62 tok_unary = -12, 63 64 // var definition 65 tok_var = -13 66 }; 67 68 static std::string IdentifierStr; // Filled in if tok_identifier 69 static double NumVal; // Filled in if tok_number 70 71 /// gettok - Return the next token from standard input. 72 static int gettok() { 73 static int LastChar = ' '; 74 75 // Skip any whitespace. 76 while (isspace(LastChar)) 77 LastChar = getchar(); 78 79 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 80 IdentifierStr = LastChar; 81 while (isalnum((LastChar = getchar()))) 82 IdentifierStr += LastChar; 83 84 if (IdentifierStr == "def") 85 return tok_def; 86 if (IdentifierStr == "extern") 87 return tok_extern; 88 if (IdentifierStr == "if") 89 return tok_if; 90 if (IdentifierStr == "then") 91 return tok_then; 92 if (IdentifierStr == "else") 93 return tok_else; 94 if (IdentifierStr == "for") 95 return tok_for; 96 if (IdentifierStr == "in") 97 return tok_in; 98 if (IdentifierStr == "binary") 99 return tok_binary; 100 if (IdentifierStr == "unary") 101 return tok_unary; 102 if (IdentifierStr == "var") 103 return tok_var; 104 return tok_identifier; 105 } 106 107 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 108 std::string NumStr; 109 do { 110 NumStr += LastChar; 111 LastChar = getchar(); 112 } while (isdigit(LastChar) || LastChar == '.'); 113 114 NumVal = strtod(NumStr.c_str(), nullptr); 115 return tok_number; 116 } 117 118 if (LastChar == '#') { 119 // Comment until end of line. 120 do 121 LastChar = getchar(); 122 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 123 124 if (LastChar != EOF) 125 return gettok(); 126 } 127 128 // Check for end of file. Don't eat the EOF. 129 if (LastChar == EOF) 130 return tok_eof; 131 132 // Otherwise, just return the character as its ascii value. 133 int ThisChar = LastChar; 134 LastChar = getchar(); 135 return ThisChar; 136 } 137 138 //===----------------------------------------------------------------------===// 139 // Abstract Syntax Tree (aka Parse Tree) 140 //===----------------------------------------------------------------------===// 141 142 namespace { 143 144 /// ExprAST - Base class for all expression nodes. 145 class ExprAST { 146 public: 147 virtual ~ExprAST() = default; 148 149 virtual Value *codegen() = 0; 150 }; 151 152 /// NumberExprAST - Expression class for numeric literals like "1.0". 153 class NumberExprAST : public ExprAST { 154 double Val; 155 156 public: 157 NumberExprAST(double Val) : Val(Val) {} 158 159 Value *codegen() override; 160 }; 161 162 /// VariableExprAST - Expression class for referencing a variable, like "a". 163 class VariableExprAST : public ExprAST { 164 std::string Name; 165 166 public: 167 VariableExprAST(const std::string &Name) : Name(Name) {} 168 169 Value *codegen() override; 170 const std::string &getName() const { return Name; } 171 }; 172 173 /// UnaryExprAST - Expression class for a unary operator. 174 class UnaryExprAST : public ExprAST { 175 char Opcode; 176 std::unique_ptr<ExprAST> Operand; 177 178 public: 179 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 180 : Opcode(Opcode), Operand(std::move(Operand)) {} 181 182 Value *codegen() override; 183 }; 184 185 /// BinaryExprAST - Expression class for a binary operator. 186 class BinaryExprAST : public ExprAST { 187 char Op; 188 std::unique_ptr<ExprAST> LHS, RHS; 189 190 public: 191 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 192 std::unique_ptr<ExprAST> RHS) 193 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 194 195 Value *codegen() override; 196 }; 197 198 /// CallExprAST - Expression class for function calls. 199 class CallExprAST : public ExprAST { 200 std::string Callee; 201 std::vector<std::unique_ptr<ExprAST>> Args; 202 203 public: 204 CallExprAST(const std::string &Callee, 205 std::vector<std::unique_ptr<ExprAST>> Args) 206 : Callee(Callee), Args(std::move(Args)) {} 207 208 Value *codegen() override; 209 }; 210 211 /// IfExprAST - Expression class for if/then/else. 212 class IfExprAST : public ExprAST { 213 std::unique_ptr<ExprAST> Cond, Then, Else; 214 215 public: 216 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, 217 std::unique_ptr<ExprAST> Else) 218 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} 219 220 Value *codegen() override; 221 }; 222 223 /// ForExprAST - Expression class for for/in. 224 class ForExprAST : public ExprAST { 225 std::string VarName; 226 std::unique_ptr<ExprAST> Start, End, Step, Body; 227 228 public: 229 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, 230 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, 231 std::unique_ptr<ExprAST> Body) 232 : VarName(VarName), Start(std::move(Start)), End(std::move(End)), 233 Step(std::move(Step)), Body(std::move(Body)) {} 234 235 Value *codegen() override; 236 }; 237 238 /// VarExprAST - Expression class for var/in 239 class VarExprAST : public ExprAST { 240 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 241 std::unique_ptr<ExprAST> Body; 242 243 public: 244 VarExprAST( 245 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, 246 std::unique_ptr<ExprAST> Body) 247 : VarNames(std::move(VarNames)), Body(std::move(Body)) {} 248 249 Value *codegen() override; 250 }; 251 252 /// PrototypeAST - This class represents the "prototype" for a function, 253 /// which captures its name, and its argument names (thus implicitly the number 254 /// of arguments the function takes), as well as if it is an operator. 255 class PrototypeAST { 256 std::string Name; 257 std::vector<std::string> Args; 258 bool IsOperator; 259 unsigned Precedence; // Precedence if a binary op. 260 261 public: 262 PrototypeAST(const std::string &Name, std::vector<std::string> Args, 263 bool IsOperator = false, unsigned Prec = 0) 264 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), 265 Precedence(Prec) {} 266 267 Function *codegen(); 268 const std::string &getName() const { return Name; } 269 270 bool isUnaryOp() const { return IsOperator && Args.size() == 1; } 271 bool isBinaryOp() const { return IsOperator && Args.size() == 2; } 272 273 char getOperatorName() const { 274 assert(isUnaryOp() || isBinaryOp()); 275 return Name[Name.size() - 1]; 276 } 277 278 unsigned getBinaryPrecedence() const { return Precedence; } 279 }; 280 281 /// FunctionAST - This class represents a function definition itself. 282 class FunctionAST { 283 std::unique_ptr<PrototypeAST> Proto; 284 std::unique_ptr<ExprAST> Body; 285 286 public: 287 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 288 std::unique_ptr<ExprAST> Body) 289 : Proto(std::move(Proto)), Body(std::move(Body)) {} 290 291 Function *codegen(); 292 }; 293 294 } // end anonymous namespace 295 296 //===----------------------------------------------------------------------===// 297 // Parser 298 //===----------------------------------------------------------------------===// 299 300 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 301 /// token the parser is looking at. getNextToken reads another token from the 302 /// lexer and updates CurTok with its results. 303 static int CurTok; 304 static int getNextToken() { return CurTok = gettok(); } 305 306 /// BinopPrecedence - This holds the precedence for each binary operator that is 307 /// defined. 308 static std::map<char, int> BinopPrecedence; 309 310 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 311 static int GetTokPrecedence() { 312 if (!isascii(CurTok)) 313 return -1; 314 315 // Make sure it's a declared binop. 316 int TokPrec = BinopPrecedence[CurTok]; 317 if (TokPrec <= 0) 318 return -1; 319 return TokPrec; 320 } 321 322 /// LogError* - These are little helper functions for error handling. 323 std::unique_ptr<ExprAST> LogError(const char *Str) { 324 fprintf(stderr, "Error: %s\n", Str); 325 return nullptr; 326 } 327 328 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 329 LogError(Str); 330 return nullptr; 331 } 332 333 static std::unique_ptr<ExprAST> ParseExpression(); 334 335 /// numberexpr ::= number 336 static std::unique_ptr<ExprAST> ParseNumberExpr() { 337 auto Result = std::make_unique<NumberExprAST>(NumVal); 338 getNextToken(); // consume the number 339 return std::move(Result); 340 } 341 342 /// parenexpr ::= '(' expression ')' 343 static std::unique_ptr<ExprAST> ParseParenExpr() { 344 getNextToken(); // eat (. 345 auto V = ParseExpression(); 346 if (!V) 347 return nullptr; 348 349 if (CurTok != ')') 350 return LogError("expected ')'"); 351 getNextToken(); // eat ). 352 return V; 353 } 354 355 /// identifierexpr 356 /// ::= identifier 357 /// ::= identifier '(' expression* ')' 358 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 359 std::string IdName = IdentifierStr; 360 361 getNextToken(); // eat identifier. 362 363 if (CurTok != '(') // Simple variable ref. 364 return std::make_unique<VariableExprAST>(IdName); 365 366 // Call. 367 getNextToken(); // eat ( 368 std::vector<std::unique_ptr<ExprAST>> Args; 369 if (CurTok != ')') { 370 while (true) { 371 if (auto Arg = ParseExpression()) 372 Args.push_back(std::move(Arg)); 373 else 374 return nullptr; 375 376 if (CurTok == ')') 377 break; 378 379 if (CurTok != ',') 380 return LogError("Expected ')' or ',' in argument list"); 381 getNextToken(); 382 } 383 } 384 385 // Eat the ')'. 386 getNextToken(); 387 388 return std::make_unique<CallExprAST>(IdName, std::move(Args)); 389 } 390 391 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 392 static std::unique_ptr<ExprAST> ParseIfExpr() { 393 getNextToken(); // eat the if. 394 395 // condition. 396 auto Cond = ParseExpression(); 397 if (!Cond) 398 return nullptr; 399 400 if (CurTok != tok_then) 401 return LogError("expected then"); 402 getNextToken(); // eat the then 403 404 auto Then = ParseExpression(); 405 if (!Then) 406 return nullptr; 407 408 if (CurTok != tok_else) 409 return LogError("expected else"); 410 411 getNextToken(); 412 413 auto Else = ParseExpression(); 414 if (!Else) 415 return nullptr; 416 417 return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then), 418 std::move(Else)); 419 } 420 421 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 422 static std::unique_ptr<ExprAST> ParseForExpr() { 423 getNextToken(); // eat the for. 424 425 if (CurTok != tok_identifier) 426 return LogError("expected identifier after for"); 427 428 std::string IdName = IdentifierStr; 429 getNextToken(); // eat identifier. 430 431 if (CurTok != '=') 432 return LogError("expected '=' after for"); 433 getNextToken(); // eat '='. 434 435 auto Start = ParseExpression(); 436 if (!Start) 437 return nullptr; 438 if (CurTok != ',') 439 return LogError("expected ',' after for start value"); 440 getNextToken(); 441 442 auto End = ParseExpression(); 443 if (!End) 444 return nullptr; 445 446 // The step value is optional. 447 std::unique_ptr<ExprAST> Step; 448 if (CurTok == ',') { 449 getNextToken(); 450 Step = ParseExpression(); 451 if (!Step) 452 return nullptr; 453 } 454 455 if (CurTok != tok_in) 456 return LogError("expected 'in' after for"); 457 getNextToken(); // eat 'in'. 458 459 auto Body = ParseExpression(); 460 if (!Body) 461 return nullptr; 462 463 return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), 464 std::move(Step), std::move(Body)); 465 } 466 467 /// varexpr ::= 'var' identifier ('=' expression)? 468 // (',' identifier ('=' expression)?)* 'in' expression 469 static std::unique_ptr<ExprAST> ParseVarExpr() { 470 getNextToken(); // eat the var. 471 472 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 473 474 // At least one variable name is required. 475 if (CurTok != tok_identifier) 476 return LogError("expected identifier after var"); 477 478 while (true) { 479 std::string Name = IdentifierStr; 480 getNextToken(); // eat identifier. 481 482 // Read the optional initializer. 483 std::unique_ptr<ExprAST> Init = nullptr; 484 if (CurTok == '=') { 485 getNextToken(); // eat the '='. 486 487 Init = ParseExpression(); 488 if (!Init) 489 return nullptr; 490 } 491 492 VarNames.push_back(std::make_pair(Name, std::move(Init))); 493 494 // End of var list, exit loop. 495 if (CurTok != ',') 496 break; 497 getNextToken(); // eat the ','. 498 499 if (CurTok != tok_identifier) 500 return LogError("expected identifier list after var"); 501 } 502 503 // At this point, we have to have 'in'. 504 if (CurTok != tok_in) 505 return LogError("expected 'in' keyword after 'var'"); 506 getNextToken(); // eat 'in'. 507 508 auto Body = ParseExpression(); 509 if (!Body) 510 return nullptr; 511 512 return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); 513 } 514 515 /// primary 516 /// ::= identifierexpr 517 /// ::= numberexpr 518 /// ::= parenexpr 519 /// ::= ifexpr 520 /// ::= forexpr 521 /// ::= varexpr 522 static std::unique_ptr<ExprAST> ParsePrimary() { 523 switch (CurTok) { 524 default: 525 return LogError("unknown token when expecting an expression"); 526 case tok_identifier: 527 return ParseIdentifierExpr(); 528 case tok_number: 529 return ParseNumberExpr(); 530 case '(': 531 return ParseParenExpr(); 532 case tok_if: 533 return ParseIfExpr(); 534 case tok_for: 535 return ParseForExpr(); 536 case tok_var: 537 return ParseVarExpr(); 538 } 539 } 540 541 /// unary 542 /// ::= primary 543 /// ::= '!' unary 544 static std::unique_ptr<ExprAST> ParseUnary() { 545 // If the current token is not an operator, it must be a primary expr. 546 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 547 return ParsePrimary(); 548 549 // If this is a unary operator, read it. 550 int Opc = CurTok; 551 getNextToken(); 552 if (auto Operand = ParseUnary()) 553 return std::make_unique<UnaryExprAST>(Opc, std::move(Operand)); 554 return nullptr; 555 } 556 557 /// binoprhs 558 /// ::= ('+' unary)* 559 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 560 std::unique_ptr<ExprAST> LHS) { 561 // If this is a binop, find its precedence. 562 while (true) { 563 int TokPrec = GetTokPrecedence(); 564 565 // If this is a binop that binds at least as tightly as the current binop, 566 // consume it, otherwise we are done. 567 if (TokPrec < ExprPrec) 568 return LHS; 569 570 // Okay, we know this is a binop. 571 int BinOp = CurTok; 572 getNextToken(); // eat binop 573 574 // Parse the unary expression after the binary operator. 575 auto RHS = ParseUnary(); 576 if (!RHS) 577 return nullptr; 578 579 // If BinOp binds less tightly with RHS than the operator after RHS, let 580 // the pending operator take RHS as its LHS. 581 int NextPrec = GetTokPrecedence(); 582 if (TokPrec < NextPrec) { 583 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 584 if (!RHS) 585 return nullptr; 586 } 587 588 // Merge LHS/RHS. 589 LHS = 590 std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 591 } 592 } 593 594 /// expression 595 /// ::= unary binoprhs 596 /// 597 static std::unique_ptr<ExprAST> ParseExpression() { 598 auto LHS = ParseUnary(); 599 if (!LHS) 600 return nullptr; 601 602 return ParseBinOpRHS(0, std::move(LHS)); 603 } 604 605 /// prototype 606 /// ::= id '(' id* ')' 607 /// ::= binary LETTER number? (id, id) 608 /// ::= unary LETTER (id) 609 static std::unique_ptr<PrototypeAST> ParsePrototype() { 610 std::string FnName; 611 612 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 613 unsigned BinaryPrecedence = 30; 614 615 switch (CurTok) { 616 default: 617 return LogErrorP("Expected function name in prototype"); 618 case tok_identifier: 619 FnName = IdentifierStr; 620 Kind = 0; 621 getNextToken(); 622 break; 623 case tok_unary: 624 getNextToken(); 625 if (!isascii(CurTok)) 626 return LogErrorP("Expected unary operator"); 627 FnName = "unary"; 628 FnName += (char)CurTok; 629 Kind = 1; 630 getNextToken(); 631 break; 632 case tok_binary: 633 getNextToken(); 634 if (!isascii(CurTok)) 635 return LogErrorP("Expected binary operator"); 636 FnName = "binary"; 637 FnName += (char)CurTok; 638 Kind = 2; 639 getNextToken(); 640 641 // Read the precedence if present. 642 if (CurTok == tok_number) { 643 if (NumVal < 1 || NumVal > 100) 644 return LogErrorP("Invalid precedence: must be 1..100"); 645 BinaryPrecedence = (unsigned)NumVal; 646 getNextToken(); 647 } 648 break; 649 } 650 651 if (CurTok != '(') 652 return LogErrorP("Expected '(' in prototype"); 653 654 std::vector<std::string> ArgNames; 655 while (getNextToken() == tok_identifier) 656 ArgNames.push_back(IdentifierStr); 657 if (CurTok != ')') 658 return LogErrorP("Expected ')' in prototype"); 659 660 // success. 661 getNextToken(); // eat ')'. 662 663 // Verify right number of names for operator. 664 if (Kind && ArgNames.size() != Kind) 665 return LogErrorP("Invalid number of operands for operator"); 666 667 return std::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, 668 BinaryPrecedence); 669 } 670 671 /// definition ::= 'def' prototype expression 672 static std::unique_ptr<FunctionAST> ParseDefinition() { 673 getNextToken(); // eat def. 674 auto Proto = ParsePrototype(); 675 if (!Proto) 676 return nullptr; 677 678 if (auto E = ParseExpression()) 679 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 680 return nullptr; 681 } 682 683 /// toplevelexpr ::= expression 684 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 685 if (auto E = ParseExpression()) { 686 // Make an anonymous proto. 687 auto Proto = std::make_unique<PrototypeAST>("__anon_expr", 688 std::vector<std::string>()); 689 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 690 } 691 return nullptr; 692 } 693 694 /// external ::= 'extern' prototype 695 static std::unique_ptr<PrototypeAST> ParseExtern() { 696 getNextToken(); // eat extern. 697 return ParsePrototype(); 698 } 699 700 //===----------------------------------------------------------------------===// 701 // Code Generation 702 //===----------------------------------------------------------------------===// 703 704 static std::unique_ptr<LLVMContext> TheContext; 705 static std::unique_ptr<Module> TheModule; 706 static std::unique_ptr<IRBuilder<>> Builder; 707 static std::map<std::string, AllocaInst *> NamedValues; 708 static std::unique_ptr<legacy::FunctionPassManager> TheFPM; 709 static std::unique_ptr<KaleidoscopeJIT> TheJIT; 710 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; 711 static ExitOnError ExitOnErr; 712 713 Value *LogErrorV(const char *Str) { 714 LogError(Str); 715 return nullptr; 716 } 717 718 Function *getFunction(std::string Name) { 719 // First, see if the function has already been added to the current module. 720 if (auto *F = TheModule->getFunction(Name)) 721 return F; 722 723 // If not, check whether we can codegen the declaration from some existing 724 // prototype. 725 auto FI = FunctionProtos.find(Name); 726 if (FI != FunctionProtos.end()) 727 return FI->second->codegen(); 728 729 // If no existing prototype exists, return null. 730 return nullptr; 731 } 732 733 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 734 /// the function. This is used for mutable variables etc. 735 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 736 StringRef VarName) { 737 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 738 TheFunction->getEntryBlock().begin()); 739 return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName); 740 } 741 742 Value *NumberExprAST::codegen() { 743 return ConstantFP::get(*TheContext, APFloat(Val)); 744 } 745 746 Value *VariableExprAST::codegen() { 747 // Look this variable up in the function. 748 AllocaInst *A = NamedValues[Name]; 749 if (!A) 750 return LogErrorV("Unknown variable name"); 751 752 // Load the value. 753 return Builder->CreateLoad(A->getAllocatedType(), A, Name.c_str()); 754 } 755 756 Value *UnaryExprAST::codegen() { 757 Value *OperandV = Operand->codegen(); 758 if (!OperandV) 759 return nullptr; 760 761 Function *F = getFunction(std::string("unary") + Opcode); 762 if (!F) 763 return LogErrorV("Unknown unary operator"); 764 765 return Builder->CreateCall(F, OperandV, "unop"); 766 } 767 768 Value *BinaryExprAST::codegen() { 769 // Special case '=' because we don't want to emit the LHS as an expression. 770 if (Op == '=') { 771 // Assignment requires the LHS to be an identifier. 772 // This assume we're building without RTTI because LLVM builds that way by 773 // default. If you build LLVM with RTTI this can be changed to a 774 // dynamic_cast for automatic error checking. 775 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); 776 if (!LHSE) 777 return LogErrorV("destination of '=' must be a variable"); 778 // Codegen the RHS. 779 Value *Val = RHS->codegen(); 780 if (!Val) 781 return nullptr; 782 783 // Look up the name. 784 Value *Variable = NamedValues[LHSE->getName()]; 785 if (!Variable) 786 return LogErrorV("Unknown variable name"); 787 788 Builder->CreateStore(Val, Variable); 789 return Val; 790 } 791 792 Value *L = LHS->codegen(); 793 Value *R = RHS->codegen(); 794 if (!L || !R) 795 return nullptr; 796 797 switch (Op) { 798 case '+': 799 return Builder->CreateFAdd(L, R, "addtmp"); 800 case '-': 801 return Builder->CreateFSub(L, R, "subtmp"); 802 case '*': 803 return Builder->CreateFMul(L, R, "multmp"); 804 case '<': 805 L = Builder->CreateFCmpULT(L, R, "cmptmp"); 806 // Convert bool 0/1 to double 0.0 or 1.0 807 return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp"); 808 default: 809 break; 810 } 811 812 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 813 // a call to it. 814 Function *F = getFunction(std::string("binary") + Op); 815 assert(F && "binary operator not found!"); 816 817 Value *Ops[] = {L, R}; 818 return Builder->CreateCall(F, Ops, "binop"); 819 } 820 821 Value *CallExprAST::codegen() { 822 // Look up the name in the global module table. 823 Function *CalleeF = getFunction(Callee); 824 if (!CalleeF) 825 return LogErrorV("Unknown function referenced"); 826 827 // If argument mismatch error. 828 if (CalleeF->arg_size() != Args.size()) 829 return LogErrorV("Incorrect # arguments passed"); 830 831 std::vector<Value *> ArgsV; 832 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 833 ArgsV.push_back(Args[i]->codegen()); 834 if (!ArgsV.back()) 835 return nullptr; 836 } 837 838 return Builder->CreateCall(CalleeF, ArgsV, "calltmp"); 839 } 840 841 Value *IfExprAST::codegen() { 842 Value *CondV = Cond->codegen(); 843 if (!CondV) 844 return nullptr; 845 846 // Convert condition to a bool by comparing non-equal to 0.0. 847 CondV = Builder->CreateFCmpONE( 848 CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond"); 849 850 Function *TheFunction = Builder->GetInsertBlock()->getParent(); 851 852 // Create blocks for the then and else cases. Insert the 'then' block at the 853 // end of the function. 854 BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction); 855 BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else"); 856 BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont"); 857 858 Builder->CreateCondBr(CondV, ThenBB, ElseBB); 859 860 // Emit then value. 861 Builder->SetInsertPoint(ThenBB); 862 863 Value *ThenV = Then->codegen(); 864 if (!ThenV) 865 return nullptr; 866 867 Builder->CreateBr(MergeBB); 868 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 869 ThenBB = Builder->GetInsertBlock(); 870 871 // Emit else block. 872 TheFunction->getBasicBlockList().push_back(ElseBB); 873 Builder->SetInsertPoint(ElseBB); 874 875 Value *ElseV = Else->codegen(); 876 if (!ElseV) 877 return nullptr; 878 879 Builder->CreateBr(MergeBB); 880 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 881 ElseBB = Builder->GetInsertBlock(); 882 883 // Emit merge block. 884 TheFunction->getBasicBlockList().push_back(MergeBB); 885 Builder->SetInsertPoint(MergeBB); 886 PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp"); 887 888 PN->addIncoming(ThenV, ThenBB); 889 PN->addIncoming(ElseV, ElseBB); 890 return PN; 891 } 892 893 // Output for-loop as: 894 // var = alloca double 895 // ... 896 // start = startexpr 897 // store start -> var 898 // goto loop 899 // loop: 900 // ... 901 // bodyexpr 902 // ... 903 // loopend: 904 // step = stepexpr 905 // endcond = endexpr 906 // 907 // curvar = load var 908 // nextvar = curvar + step 909 // store nextvar -> var 910 // br endcond, loop, endloop 911 // outloop: 912 Value *ForExprAST::codegen() { 913 Function *TheFunction = Builder->GetInsertBlock()->getParent(); 914 915 // Create an alloca for the variable in the entry block. 916 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 917 918 // Emit the start code first, without 'variable' in scope. 919 Value *StartVal = Start->codegen(); 920 if (!StartVal) 921 return nullptr; 922 923 // Store the value into the alloca. 924 Builder->CreateStore(StartVal, Alloca); 925 926 // Make the new basic block for the loop header, inserting after current 927 // block. 928 BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction); 929 930 // Insert an explicit fall through from the current block to the LoopBB. 931 Builder->CreateBr(LoopBB); 932 933 // Start insertion in LoopBB. 934 Builder->SetInsertPoint(LoopBB); 935 936 // Within the loop, the variable is defined equal to the PHI node. If it 937 // shadows an existing variable, we have to restore it, so save it now. 938 AllocaInst *OldVal = NamedValues[VarName]; 939 NamedValues[VarName] = Alloca; 940 941 // Emit the body of the loop. This, like any other expr, can change the 942 // current BB. Note that we ignore the value computed by the body, but don't 943 // allow an error. 944 if (!Body->codegen()) 945 return nullptr; 946 947 // Emit the step value. 948 Value *StepVal = nullptr; 949 if (Step) { 950 StepVal = Step->codegen(); 951 if (!StepVal) 952 return nullptr; 953 } else { 954 // If not specified, use 1.0. 955 StepVal = ConstantFP::get(*TheContext, APFloat(1.0)); 956 } 957 958 // Compute the end condition. 959 Value *EndCond = End->codegen(); 960 if (!EndCond) 961 return nullptr; 962 963 // Reload, increment, and restore the alloca. This handles the case where 964 // the body of the loop mutates the variable. 965 Value *CurVar = 966 Builder->CreateLoad(Alloca->getAllocatedType(), Alloca, VarName.c_str()); 967 Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar"); 968 Builder->CreateStore(NextVar, Alloca); 969 970 // Convert condition to a bool by comparing non-equal to 0.0. 971 EndCond = Builder->CreateFCmpONE( 972 EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond"); 973 974 // Create the "after loop" block and insert it. 975 BasicBlock *AfterBB = 976 BasicBlock::Create(*TheContext, "afterloop", TheFunction); 977 978 // Insert the conditional branch into the end of LoopEndBB. 979 Builder->CreateCondBr(EndCond, LoopBB, AfterBB); 980 981 // Any new code will be inserted in AfterBB. 982 Builder->SetInsertPoint(AfterBB); 983 984 // Restore the unshadowed variable. 985 if (OldVal) 986 NamedValues[VarName] = OldVal; 987 else 988 NamedValues.erase(VarName); 989 990 // for expr always returns 0.0. 991 return Constant::getNullValue(Type::getDoubleTy(*TheContext)); 992 } 993 994 Value *VarExprAST::codegen() { 995 std::vector<AllocaInst *> OldBindings; 996 997 Function *TheFunction = Builder->GetInsertBlock()->getParent(); 998 999 // Register all variables and emit their initializer. 1000 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 1001 const std::string &VarName = VarNames[i].first; 1002 ExprAST *Init = VarNames[i].second.get(); 1003 1004 // Emit the initializer before adding the variable to scope, this prevents 1005 // the initializer from referencing the variable itself, and permits stuff 1006 // like this: 1007 // var a = 1 in 1008 // var a = a in ... # refers to outer 'a'. 1009 Value *InitVal; 1010 if (Init) { 1011 InitVal = Init->codegen(); 1012 if (!InitVal) 1013 return nullptr; 1014 } else { // If not specified, use 0.0. 1015 InitVal = ConstantFP::get(*TheContext, APFloat(0.0)); 1016 } 1017 1018 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 1019 Builder->CreateStore(InitVal, Alloca); 1020 1021 // Remember the old variable binding so that we can restore the binding when 1022 // we unrecurse. 1023 OldBindings.push_back(NamedValues[VarName]); 1024 1025 // Remember this binding. 1026 NamedValues[VarName] = Alloca; 1027 } 1028 1029 // Codegen the body, now that all vars are in scope. 1030 Value *BodyVal = Body->codegen(); 1031 if (!BodyVal) 1032 return nullptr; 1033 1034 // Pop all our variables from scope. 1035 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 1036 NamedValues[VarNames[i].first] = OldBindings[i]; 1037 1038 // Return the body computation. 1039 return BodyVal; 1040 } 1041 1042 Function *PrototypeAST::codegen() { 1043 // Make the function type: double(double,double) etc. 1044 std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext)); 1045 FunctionType *FT = 1046 FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false); 1047 1048 Function *F = 1049 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); 1050 1051 // Set names for all arguments. 1052 unsigned Idx = 0; 1053 for (auto &Arg : F->args()) 1054 Arg.setName(Args[Idx++]); 1055 1056 return F; 1057 } 1058 1059 Function *FunctionAST::codegen() { 1060 // Transfer ownership of the prototype to the FunctionProtos map, but keep a 1061 // reference to it for use below. 1062 auto &P = *Proto; 1063 FunctionProtos[Proto->getName()] = std::move(Proto); 1064 Function *TheFunction = getFunction(P.getName()); 1065 if (!TheFunction) 1066 return nullptr; 1067 1068 // If this is an operator, install it. 1069 if (P.isBinaryOp()) 1070 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); 1071 1072 // Create a new basic block to start insertion into. 1073 BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction); 1074 Builder->SetInsertPoint(BB); 1075 1076 // Record the function arguments in the NamedValues map. 1077 NamedValues.clear(); 1078 for (auto &Arg : TheFunction->args()) { 1079 // Create an alloca for this variable. 1080 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); 1081 1082 // Store the initial value into the alloca. 1083 Builder->CreateStore(&Arg, Alloca); 1084 1085 // Add arguments to variable symbol table. 1086 NamedValues[std::string(Arg.getName())] = Alloca; 1087 } 1088 1089 if (Value *RetVal = Body->codegen()) { 1090 // Finish off the function. 1091 Builder->CreateRet(RetVal); 1092 1093 // Validate the generated code, checking for consistency. 1094 verifyFunction(*TheFunction); 1095 1096 // Run the optimizer on the function. 1097 TheFPM->run(*TheFunction); 1098 1099 return TheFunction; 1100 } 1101 1102 // Error reading body, remove function. 1103 TheFunction->eraseFromParent(); 1104 1105 if (P.isBinaryOp()) 1106 BinopPrecedence.erase(P.getOperatorName()); 1107 return nullptr; 1108 } 1109 1110 //===----------------------------------------------------------------------===// 1111 // Top-Level parsing and JIT Driver 1112 //===----------------------------------------------------------------------===// 1113 1114 static void InitializeModuleAndPassManager() { 1115 // Open a new module. 1116 TheContext = std::make_unique<LLVMContext>(); 1117 TheModule = std::make_unique<Module>("my cool jit", *TheContext); 1118 TheModule->setDataLayout(TheJIT->getDataLayout()); 1119 1120 // Create a new builder for the module. 1121 Builder = std::make_unique<IRBuilder<>>(*TheContext); 1122 1123 // Create a new pass manager attached to it. 1124 TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get()); 1125 1126 // Promote allocas to registers. 1127 TheFPM->add(createPromoteMemoryToRegisterPass()); 1128 // Do simple "peephole" optimizations and bit-twiddling optzns. 1129 TheFPM->add(createInstructionCombiningPass()); 1130 // Reassociate expressions. 1131 TheFPM->add(createReassociatePass()); 1132 // Eliminate Common SubExpressions. 1133 TheFPM->add(createGVNPass()); 1134 // Simplify the control flow graph (deleting unreachable blocks, etc). 1135 TheFPM->add(createCFGSimplificationPass()); 1136 1137 TheFPM->doInitialization(); 1138 } 1139 1140 static void HandleDefinition() { 1141 if (auto FnAST = ParseDefinition()) { 1142 if (auto *FnIR = FnAST->codegen()) { 1143 fprintf(stderr, "Read function definition:"); 1144 FnIR->print(errs()); 1145 fprintf(stderr, "\n"); 1146 ExitOnErr(TheJIT->addModule( 1147 ThreadSafeModule(std::move(TheModule), std::move(TheContext)))); 1148 InitializeModuleAndPassManager(); 1149 } 1150 } else { 1151 // Skip token for error recovery. 1152 getNextToken(); 1153 } 1154 } 1155 1156 static void HandleExtern() { 1157 if (auto ProtoAST = ParseExtern()) { 1158 if (auto *FnIR = ProtoAST->codegen()) { 1159 fprintf(stderr, "Read extern: "); 1160 FnIR->print(errs()); 1161 fprintf(stderr, "\n"); 1162 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); 1163 } 1164 } else { 1165 // Skip token for error recovery. 1166 getNextToken(); 1167 } 1168 } 1169 1170 static void HandleTopLevelExpression() { 1171 // Evaluate a top-level expression into an anonymous function. 1172 if (auto FnAST = ParseTopLevelExpr()) { 1173 if (FnAST->codegen()) { 1174 // Create a ResourceTracker to track JIT'd memory allocated to our 1175 // anonymous expression -- that way we can free it after executing. 1176 auto RT = TheJIT->getMainJITDylib().createResourceTracker(); 1177 1178 auto TSM = ThreadSafeModule(std::move(TheModule), std::move(TheContext)); 1179 ExitOnErr(TheJIT->addModule(std::move(TSM), RT)); 1180 InitializeModuleAndPassManager(); 1181 1182 // Search the JIT for the __anon_expr symbol. 1183 auto ExprSymbol = ExitOnErr(TheJIT->lookup("__anon_expr")); 1184 1185 // Get the symbol's address and cast it to the right type (takes no 1186 // arguments, returns a double) so we can call it as a native function. 1187 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); 1188 fprintf(stderr, "Evaluated to %f\n", FP()); 1189 1190 // Delete the anonymous expression module from the JIT. 1191 ExitOnErr(RT->remove()); 1192 } 1193 } else { 1194 // Skip token for error recovery. 1195 getNextToken(); 1196 } 1197 } 1198 1199 /// top ::= definition | external | expression | ';' 1200 static void MainLoop() { 1201 while (true) { 1202 fprintf(stderr, "ready> "); 1203 switch (CurTok) { 1204 case tok_eof: 1205 return; 1206 case ';': // ignore top-level semicolons. 1207 getNextToken(); 1208 break; 1209 case tok_def: 1210 HandleDefinition(); 1211 break; 1212 case tok_extern: 1213 HandleExtern(); 1214 break; 1215 default: 1216 HandleTopLevelExpression(); 1217 break; 1218 } 1219 } 1220 } 1221 1222 //===----------------------------------------------------------------------===// 1223 // "Library" functions that can be "extern'd" from user code. 1224 //===----------------------------------------------------------------------===// 1225 1226 #ifdef _WIN32 1227 #define DLLEXPORT __declspec(dllexport) 1228 #else 1229 #define DLLEXPORT 1230 #endif 1231 1232 /// putchard - putchar that takes a double and returns 0. 1233 extern "C" DLLEXPORT double putchard(double X) { 1234 fputc((char)X, stderr); 1235 return 0; 1236 } 1237 1238 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1239 extern "C" DLLEXPORT double printd(double X) { 1240 fprintf(stderr, "%f\n", X); 1241 return 0; 1242 } 1243 1244 //===----------------------------------------------------------------------===// 1245 // Main driver code. 1246 //===----------------------------------------------------------------------===// 1247 1248 int main() { 1249 InitializeNativeTarget(); 1250 InitializeNativeTargetAsmPrinter(); 1251 InitializeNativeTargetAsmParser(); 1252 1253 // Install standard binary operators. 1254 // 1 is lowest precedence. 1255 BinopPrecedence['='] = 2; 1256 BinopPrecedence['<'] = 10; 1257 BinopPrecedence['+'] = 20; 1258 BinopPrecedence['-'] = 20; 1259 BinopPrecedence['*'] = 40; // highest. 1260 1261 // Prime the first token. 1262 fprintf(stderr, "ready> "); 1263 getNextToken(); 1264 1265 TheJIT = ExitOnErr(KaleidoscopeJIT::Create()); 1266 1267 InitializeModuleAndPassManager(); 1268 1269 // Run the main "interpreter loop" now. 1270 MainLoop(); 1271 1272 return 0; 1273 } 1274