1 #include "llvm/ADT/APFloat.h" 2 #include "llvm/ADT/STLExtras.h" 3 #include "llvm/IR/BasicBlock.h" 4 #include "llvm/IR/Constants.h" 5 #include "llvm/IR/DerivedTypes.h" 6 #include "llvm/IR/Function.h" 7 #include "llvm/IR/Instructions.h" 8 #include "llvm/IR/IRBuilder.h" 9 #include "llvm/IR/LLVMContext.h" 10 #include "llvm/IR/Module.h" 11 #include "llvm/IR/Type.h" 12 #include "llvm/IR/Verifier.h" 13 #include "llvm/Support/Error.h" 14 #include "llvm/Support/ErrorHandling.h" 15 #include "llvm/Support/TargetSelect.h" 16 #include "llvm/Target/TargetMachine.h" 17 #include "KaleidoscopeJIT.h" 18 #include <algorithm> 19 #include <cassert> 20 #include <cctype> 21 #include <cstdint> 22 #include <cstdio> 23 #include <cstdlib> 24 #include <map> 25 #include <memory> 26 #include <string> 27 #include <utility> 28 #include <vector> 29 30 using namespace llvm; 31 using namespace llvm::orc; 32 33 //===----------------------------------------------------------------------===// 34 // Lexer 35 //===----------------------------------------------------------------------===// 36 37 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 38 // of these for known things. 39 enum Token { 40 tok_eof = -1, 41 42 // commands 43 tok_def = -2, 44 tok_extern = -3, 45 46 // primary 47 tok_identifier = -4, 48 tok_number = -5, 49 50 // control 51 tok_if = -6, 52 tok_then = -7, 53 tok_else = -8, 54 tok_for = -9, 55 tok_in = -10, 56 57 // operators 58 tok_binary = -11, 59 tok_unary = -12, 60 61 // var definition 62 tok_var = -13 63 }; 64 65 static std::string IdentifierStr; // Filled in if tok_identifier 66 static double NumVal; // Filled in if tok_number 67 68 /// gettok - Return the next token from standard input. 69 static int gettok() { 70 static int LastChar = ' '; 71 72 // Skip any whitespace. 73 while (isspace(LastChar)) 74 LastChar = getchar(); 75 76 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 77 IdentifierStr = LastChar; 78 while (isalnum((LastChar = getchar()))) 79 IdentifierStr += LastChar; 80 81 if (IdentifierStr == "def") 82 return tok_def; 83 if (IdentifierStr == "extern") 84 return tok_extern; 85 if (IdentifierStr == "if") 86 return tok_if; 87 if (IdentifierStr == "then") 88 return tok_then; 89 if (IdentifierStr == "else") 90 return tok_else; 91 if (IdentifierStr == "for") 92 return tok_for; 93 if (IdentifierStr == "in") 94 return tok_in; 95 if (IdentifierStr == "binary") 96 return tok_binary; 97 if (IdentifierStr == "unary") 98 return tok_unary; 99 if (IdentifierStr == "var") 100 return tok_var; 101 return tok_identifier; 102 } 103 104 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 105 std::string NumStr; 106 do { 107 NumStr += LastChar; 108 LastChar = getchar(); 109 } while (isdigit(LastChar) || LastChar == '.'); 110 111 NumVal = strtod(NumStr.c_str(), nullptr); 112 return tok_number; 113 } 114 115 if (LastChar == '#') { 116 // Comment until end of line. 117 do 118 LastChar = getchar(); 119 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 120 121 if (LastChar != EOF) 122 return gettok(); 123 } 124 125 // Check for end of file. Don't eat the EOF. 126 if (LastChar == EOF) 127 return tok_eof; 128 129 // Otherwise, just return the character as its ascii value. 130 int ThisChar = LastChar; 131 LastChar = getchar(); 132 return ThisChar; 133 } 134 135 //===----------------------------------------------------------------------===// 136 // Abstract Syntax Tree (aka Parse Tree) 137 //===----------------------------------------------------------------------===// 138 139 /// ExprAST - Base class for all expression nodes. 140 class ExprAST { 141 public: 142 virtual ~ExprAST() = default; 143 144 virtual Value *codegen() = 0; 145 }; 146 147 /// NumberExprAST - Expression class for numeric literals like "1.0". 148 class NumberExprAST : public ExprAST { 149 double Val; 150 151 public: 152 NumberExprAST(double Val) : Val(Val) {} 153 154 Value *codegen() override; 155 }; 156 157 /// VariableExprAST - Expression class for referencing a variable, like "a". 158 class VariableExprAST : public ExprAST { 159 std::string Name; 160 161 public: 162 VariableExprAST(const std::string &Name) : Name(Name) {} 163 164 Value *codegen() override; 165 const std::string &getName() const { return Name; } 166 }; 167 168 /// UnaryExprAST - Expression class for a unary operator. 169 class UnaryExprAST : public ExprAST { 170 char Opcode; 171 std::unique_ptr<ExprAST> Operand; 172 173 public: 174 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 175 : Opcode(Opcode), Operand(std::move(Operand)) {} 176 177 Value *codegen() override; 178 }; 179 180 /// BinaryExprAST - Expression class for a binary operator. 181 class BinaryExprAST : public ExprAST { 182 char Op; 183 std::unique_ptr<ExprAST> LHS, RHS; 184 185 public: 186 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 187 std::unique_ptr<ExprAST> RHS) 188 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 189 190 Value *codegen() override; 191 }; 192 193 /// CallExprAST - Expression class for function calls. 194 class CallExprAST : public ExprAST { 195 std::string Callee; 196 std::vector<std::unique_ptr<ExprAST>> Args; 197 198 public: 199 CallExprAST(const std::string &Callee, 200 std::vector<std::unique_ptr<ExprAST>> Args) 201 : Callee(Callee), Args(std::move(Args)) {} 202 203 Value *codegen() override; 204 }; 205 206 /// IfExprAST - Expression class for if/then/else. 207 class IfExprAST : public ExprAST { 208 std::unique_ptr<ExprAST> Cond, Then, Else; 209 210 public: 211 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, 212 std::unique_ptr<ExprAST> Else) 213 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} 214 215 Value *codegen() override; 216 }; 217 218 /// ForExprAST - Expression class for for/in. 219 class ForExprAST : public ExprAST { 220 std::string VarName; 221 std::unique_ptr<ExprAST> Start, End, Step, Body; 222 223 public: 224 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, 225 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, 226 std::unique_ptr<ExprAST> Body) 227 : VarName(VarName), Start(std::move(Start)), End(std::move(End)), 228 Step(std::move(Step)), Body(std::move(Body)) {} 229 230 Value *codegen() override; 231 }; 232 233 /// VarExprAST - Expression class for var/in 234 class VarExprAST : public ExprAST { 235 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 236 std::unique_ptr<ExprAST> Body; 237 238 public: 239 VarExprAST( 240 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, 241 std::unique_ptr<ExprAST> Body) 242 : VarNames(std::move(VarNames)), Body(std::move(Body)) {} 243 244 Value *codegen() override; 245 }; 246 247 /// PrototypeAST - This class represents the "prototype" for a function, 248 /// which captures its name, and its argument names (thus implicitly the number 249 /// of arguments the function takes), as well as if it is an operator. 250 class PrototypeAST { 251 std::string Name; 252 std::vector<std::string> Args; 253 bool IsOperator; 254 unsigned Precedence; // Precedence if a binary op. 255 256 public: 257 PrototypeAST(const std::string &Name, std::vector<std::string> Args, 258 bool IsOperator = false, unsigned Prec = 0) 259 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), 260 Precedence(Prec) {} 261 262 Function *codegen(); 263 const std::string &getName() const { return Name; } 264 265 bool isUnaryOp() const { return IsOperator && Args.size() == 1; } 266 bool isBinaryOp() const { return IsOperator && Args.size() == 2; } 267 268 char getOperatorName() const { 269 assert(isUnaryOp() || isBinaryOp()); 270 return Name[Name.size() - 1]; 271 } 272 273 unsigned getBinaryPrecedence() const { return Precedence; } 274 }; 275 276 //===----------------------------------------------------------------------===// 277 // Parser 278 //===----------------------------------------------------------------------===// 279 280 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 281 /// token the parser is looking at. getNextToken reads another token from the 282 /// lexer and updates CurTok with its results. 283 static int CurTok; 284 static int getNextToken() { return CurTok = gettok(); } 285 286 /// BinopPrecedence - This holds the precedence for each binary operator that is 287 /// defined. 288 static std::map<char, int> BinopPrecedence; 289 290 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 291 static int GetTokPrecedence() { 292 if (!isascii(CurTok)) 293 return -1; 294 295 // Make sure it's a declared binop. 296 int TokPrec = BinopPrecedence[CurTok]; 297 if (TokPrec <= 0) 298 return -1; 299 return TokPrec; 300 } 301 302 /// LogError* - These are little helper functions for error handling. 303 std::unique_ptr<ExprAST> LogError(const char *Str) { 304 fprintf(stderr, "Error: %s\n", Str); 305 return nullptr; 306 } 307 308 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 309 LogError(Str); 310 return nullptr; 311 } 312 313 static std::unique_ptr<ExprAST> ParseExpression(); 314 315 /// numberexpr ::= number 316 static std::unique_ptr<ExprAST> ParseNumberExpr() { 317 auto Result = std::make_unique<NumberExprAST>(NumVal); 318 getNextToken(); // consume the number 319 return std::move(Result); 320 } 321 322 /// parenexpr ::= '(' expression ')' 323 static std::unique_ptr<ExprAST> ParseParenExpr() { 324 getNextToken(); // eat (. 325 auto V = ParseExpression(); 326 if (!V) 327 return nullptr; 328 329 if (CurTok != ')') 330 return LogError("expected ')'"); 331 getNextToken(); // eat ). 332 return V; 333 } 334 335 /// identifierexpr 336 /// ::= identifier 337 /// ::= identifier '(' expression* ')' 338 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 339 std::string IdName = IdentifierStr; 340 341 getNextToken(); // eat identifier. 342 343 if (CurTok != '(') // Simple variable ref. 344 return std::make_unique<VariableExprAST>(IdName); 345 346 // Call. 347 getNextToken(); // eat ( 348 std::vector<std::unique_ptr<ExprAST>> Args; 349 if (CurTok != ')') { 350 while (true) { 351 if (auto Arg = ParseExpression()) 352 Args.push_back(std::move(Arg)); 353 else 354 return nullptr; 355 356 if (CurTok == ')') 357 break; 358 359 if (CurTok != ',') 360 return LogError("Expected ')' or ',' in argument list"); 361 getNextToken(); 362 } 363 } 364 365 // Eat the ')'. 366 getNextToken(); 367 368 return std::make_unique<CallExprAST>(IdName, std::move(Args)); 369 } 370 371 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 372 static std::unique_ptr<ExprAST> ParseIfExpr() { 373 getNextToken(); // eat the if. 374 375 // condition. 376 auto Cond = ParseExpression(); 377 if (!Cond) 378 return nullptr; 379 380 if (CurTok != tok_then) 381 return LogError("expected then"); 382 getNextToken(); // eat the then 383 384 auto Then = ParseExpression(); 385 if (!Then) 386 return nullptr; 387 388 if (CurTok != tok_else) 389 return LogError("expected else"); 390 391 getNextToken(); 392 393 auto Else = ParseExpression(); 394 if (!Else) 395 return nullptr; 396 397 return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then), 398 std::move(Else)); 399 } 400 401 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 402 static std::unique_ptr<ExprAST> ParseForExpr() { 403 getNextToken(); // eat the for. 404 405 if (CurTok != tok_identifier) 406 return LogError("expected identifier after for"); 407 408 std::string IdName = IdentifierStr; 409 getNextToken(); // eat identifier. 410 411 if (CurTok != '=') 412 return LogError("expected '=' after for"); 413 getNextToken(); // eat '='. 414 415 auto Start = ParseExpression(); 416 if (!Start) 417 return nullptr; 418 if (CurTok != ',') 419 return LogError("expected ',' after for start value"); 420 getNextToken(); 421 422 auto End = ParseExpression(); 423 if (!End) 424 return nullptr; 425 426 // The step value is optional. 427 std::unique_ptr<ExprAST> Step; 428 if (CurTok == ',') { 429 getNextToken(); 430 Step = ParseExpression(); 431 if (!Step) 432 return nullptr; 433 } 434 435 if (CurTok != tok_in) 436 return LogError("expected 'in' after for"); 437 getNextToken(); // eat 'in'. 438 439 auto Body = ParseExpression(); 440 if (!Body) 441 return nullptr; 442 443 return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), 444 std::move(Step), std::move(Body)); 445 } 446 447 /// varexpr ::= 'var' identifier ('=' expression)? 448 // (',' identifier ('=' expression)?)* 'in' expression 449 static std::unique_ptr<ExprAST> ParseVarExpr() { 450 getNextToken(); // eat the var. 451 452 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 453 454 // At least one variable name is required. 455 if (CurTok != tok_identifier) 456 return LogError("expected identifier after var"); 457 458 while (true) { 459 std::string Name = IdentifierStr; 460 getNextToken(); // eat identifier. 461 462 // Read the optional initializer. 463 std::unique_ptr<ExprAST> Init = nullptr; 464 if (CurTok == '=') { 465 getNextToken(); // eat the '='. 466 467 Init = ParseExpression(); 468 if (!Init) 469 return nullptr; 470 } 471 472 VarNames.push_back(std::make_pair(Name, std::move(Init))); 473 474 // End of var list, exit loop. 475 if (CurTok != ',') 476 break; 477 getNextToken(); // eat the ','. 478 479 if (CurTok != tok_identifier) 480 return LogError("expected identifier list after var"); 481 } 482 483 // At this point, we have to have 'in'. 484 if (CurTok != tok_in) 485 return LogError("expected 'in' keyword after 'var'"); 486 getNextToken(); // eat 'in'. 487 488 auto Body = ParseExpression(); 489 if (!Body) 490 return nullptr; 491 492 return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); 493 } 494 495 /// primary 496 /// ::= identifierexpr 497 /// ::= numberexpr 498 /// ::= parenexpr 499 /// ::= ifexpr 500 /// ::= forexpr 501 /// ::= varexpr 502 static std::unique_ptr<ExprAST> ParsePrimary() { 503 switch (CurTok) { 504 default: 505 return LogError("unknown token when expecting an expression"); 506 case tok_identifier: 507 return ParseIdentifierExpr(); 508 case tok_number: 509 return ParseNumberExpr(); 510 case '(': 511 return ParseParenExpr(); 512 case tok_if: 513 return ParseIfExpr(); 514 case tok_for: 515 return ParseForExpr(); 516 case tok_var: 517 return ParseVarExpr(); 518 } 519 } 520 521 /// unary 522 /// ::= primary 523 /// ::= '!' unary 524 static std::unique_ptr<ExprAST> ParseUnary() { 525 // If the current token is not an operator, it must be a primary expr. 526 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 527 return ParsePrimary(); 528 529 // If this is a unary operator, read it. 530 int Opc = CurTok; 531 getNextToken(); 532 if (auto Operand = ParseUnary()) 533 return std::make_unique<UnaryExprAST>(Opc, std::move(Operand)); 534 return nullptr; 535 } 536 537 /// binoprhs 538 /// ::= ('+' unary)* 539 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 540 std::unique_ptr<ExprAST> LHS) { 541 // If this is a binop, find its precedence. 542 while (true) { 543 int TokPrec = GetTokPrecedence(); 544 545 // If this is a binop that binds at least as tightly as the current binop, 546 // consume it, otherwise we are done. 547 if (TokPrec < ExprPrec) 548 return LHS; 549 550 // Okay, we know this is a binop. 551 int BinOp = CurTok; 552 getNextToken(); // eat binop 553 554 // Parse the unary expression after the binary operator. 555 auto RHS = ParseUnary(); 556 if (!RHS) 557 return nullptr; 558 559 // If BinOp binds less tightly with RHS than the operator after RHS, let 560 // the pending operator take RHS as its LHS. 561 int NextPrec = GetTokPrecedence(); 562 if (TokPrec < NextPrec) { 563 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 564 if (!RHS) 565 return nullptr; 566 } 567 568 // Merge LHS/RHS. 569 LHS = 570 std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 571 } 572 } 573 574 /// expression 575 /// ::= unary binoprhs 576 /// 577 static std::unique_ptr<ExprAST> ParseExpression() { 578 auto LHS = ParseUnary(); 579 if (!LHS) 580 return nullptr; 581 582 return ParseBinOpRHS(0, std::move(LHS)); 583 } 584 585 /// prototype 586 /// ::= id '(' id* ')' 587 /// ::= binary LETTER number? (id, id) 588 /// ::= unary LETTER (id) 589 static std::unique_ptr<PrototypeAST> ParsePrototype() { 590 std::string FnName; 591 592 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 593 unsigned BinaryPrecedence = 30; 594 595 switch (CurTok) { 596 default: 597 return LogErrorP("Expected function name in prototype"); 598 case tok_identifier: 599 FnName = IdentifierStr; 600 Kind = 0; 601 getNextToken(); 602 break; 603 case tok_unary: 604 getNextToken(); 605 if (!isascii(CurTok)) 606 return LogErrorP("Expected unary operator"); 607 FnName = "unary"; 608 FnName += (char)CurTok; 609 Kind = 1; 610 getNextToken(); 611 break; 612 case tok_binary: 613 getNextToken(); 614 if (!isascii(CurTok)) 615 return LogErrorP("Expected binary operator"); 616 FnName = "binary"; 617 FnName += (char)CurTok; 618 Kind = 2; 619 getNextToken(); 620 621 // Read the precedence if present. 622 if (CurTok == tok_number) { 623 if (NumVal < 1 || NumVal > 100) 624 return LogErrorP("Invalid precedecnce: must be 1..100"); 625 BinaryPrecedence = (unsigned)NumVal; 626 getNextToken(); 627 } 628 break; 629 } 630 631 if (CurTok != '(') 632 return LogErrorP("Expected '(' in prototype"); 633 634 std::vector<std::string> ArgNames; 635 while (getNextToken() == tok_identifier) 636 ArgNames.push_back(IdentifierStr); 637 if (CurTok != ')') 638 return LogErrorP("Expected ')' in prototype"); 639 640 // success. 641 getNextToken(); // eat ')'. 642 643 // Verify right number of names for operator. 644 if (Kind && ArgNames.size() != Kind) 645 return LogErrorP("Invalid number of operands for operator"); 646 647 return std::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, 648 BinaryPrecedence); 649 } 650 651 /// definition ::= 'def' prototype expression 652 static std::unique_ptr<FunctionAST> ParseDefinition() { 653 getNextToken(); // eat def. 654 auto Proto = ParsePrototype(); 655 if (!Proto) 656 return nullptr; 657 658 if (auto E = ParseExpression()) 659 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 660 return nullptr; 661 } 662 663 /// toplevelexpr ::= expression 664 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 665 if (auto E = ParseExpression()) { 666 // Make an anonymous proto. 667 auto Proto = std::make_unique<PrototypeAST>("__anon_expr", 668 std::vector<std::string>()); 669 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 670 } 671 return nullptr; 672 } 673 674 /// external ::= 'extern' prototype 675 static std::unique_ptr<PrototypeAST> ParseExtern() { 676 getNextToken(); // eat extern. 677 return ParsePrototype(); 678 } 679 680 //===----------------------------------------------------------------------===// 681 // Code Generation 682 //===----------------------------------------------------------------------===// 683 684 static LLVMContext TheContext; 685 static IRBuilder<> Builder(TheContext); 686 static std::unique_ptr<Module> TheModule; 687 static std::map<std::string, AllocaInst *> NamedValues; 688 static std::unique_ptr<KaleidoscopeJIT> TheJIT; 689 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; 690 static ExitOnError ExitOnErr; 691 692 Value *LogErrorV(const char *Str) { 693 LogError(Str); 694 return nullptr; 695 } 696 697 Function *getFunction(std::string Name) { 698 // First, see if the function has already been added to the current module. 699 if (auto *F = TheModule->getFunction(Name)) 700 return F; 701 702 // If not, check whether we can codegen the declaration from some existing 703 // prototype. 704 auto FI = FunctionProtos.find(Name); 705 if (FI != FunctionProtos.end()) 706 return FI->second->codegen(); 707 708 // If no existing prototype exists, return null. 709 return nullptr; 710 } 711 712 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 713 /// the function. This is used for mutable variables etc. 714 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 715 const std::string &VarName) { 716 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 717 TheFunction->getEntryBlock().begin()); 718 return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName); 719 } 720 721 Value *NumberExprAST::codegen() { 722 return ConstantFP::get(TheContext, APFloat(Val)); 723 } 724 725 Value *VariableExprAST::codegen() { 726 // Look this variable up in the function. 727 Value *V = NamedValues[Name]; 728 if (!V) 729 return LogErrorV("Unknown variable name"); 730 731 // Load the value. 732 return Builder.CreateLoad(V, Name.c_str()); 733 } 734 735 Value *UnaryExprAST::codegen() { 736 Value *OperandV = Operand->codegen(); 737 if (!OperandV) 738 return nullptr; 739 740 Function *F = getFunction(std::string("unary") + Opcode); 741 if (!F) 742 return LogErrorV("Unknown unary operator"); 743 744 return Builder.CreateCall(F, OperandV, "unop"); 745 } 746 747 Value *BinaryExprAST::codegen() { 748 // Special case '=' because we don't want to emit the LHS as an expression. 749 if (Op == '=') { 750 // Assignment requires the LHS to be an identifier. 751 // This assume we're building without RTTI because LLVM builds that way by 752 // default. If you build LLVM with RTTI this can be changed to a 753 // dynamic_cast for automatic error checking. 754 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); 755 if (!LHSE) 756 return LogErrorV("destination of '=' must be a variable"); 757 // Codegen the RHS. 758 Value *Val = RHS->codegen(); 759 if (!Val) 760 return nullptr; 761 762 // Look up the name. 763 Value *Variable = NamedValues[LHSE->getName()]; 764 if (!Variable) 765 return LogErrorV("Unknown variable name"); 766 767 Builder.CreateStore(Val, Variable); 768 return Val; 769 } 770 771 Value *L = LHS->codegen(); 772 Value *R = RHS->codegen(); 773 if (!L || !R) 774 return nullptr; 775 776 switch (Op) { 777 case '+': 778 return Builder.CreateFAdd(L, R, "addtmp"); 779 case '-': 780 return Builder.CreateFSub(L, R, "subtmp"); 781 case '*': 782 return Builder.CreateFMul(L, R, "multmp"); 783 case '<': 784 L = Builder.CreateFCmpULT(L, R, "cmptmp"); 785 // Convert bool 0/1 to double 0.0 or 1.0 786 return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); 787 default: 788 break; 789 } 790 791 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 792 // a call to it. 793 Function *F = getFunction(std::string("binary") + Op); 794 assert(F && "binary operator not found!"); 795 796 Value *Ops[] = {L, R}; 797 return Builder.CreateCall(F, Ops, "binop"); 798 } 799 800 Value *CallExprAST::codegen() { 801 // Look up the name in the global module table. 802 Function *CalleeF = getFunction(Callee); 803 if (!CalleeF) 804 return LogErrorV("Unknown function referenced"); 805 806 // If argument mismatch error. 807 if (CalleeF->arg_size() != Args.size()) 808 return LogErrorV("Incorrect # arguments passed"); 809 810 std::vector<Value *> ArgsV; 811 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 812 ArgsV.push_back(Args[i]->codegen()); 813 if (!ArgsV.back()) 814 return nullptr; 815 } 816 817 return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); 818 } 819 820 Value *IfExprAST::codegen() { 821 Value *CondV = Cond->codegen(); 822 if (!CondV) 823 return nullptr; 824 825 // Convert condition to a bool by comparing equal to 0.0. 826 CondV = Builder.CreateFCmpONE( 827 CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); 828 829 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 830 831 // Create blocks for the then and else cases. Insert the 'then' block at the 832 // end of the function. 833 BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); 834 BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); 835 BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); 836 837 Builder.CreateCondBr(CondV, ThenBB, ElseBB); 838 839 // Emit then value. 840 Builder.SetInsertPoint(ThenBB); 841 842 Value *ThenV = Then->codegen(); 843 if (!ThenV) 844 return nullptr; 845 846 Builder.CreateBr(MergeBB); 847 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 848 ThenBB = Builder.GetInsertBlock(); 849 850 // Emit else block. 851 TheFunction->getBasicBlockList().push_back(ElseBB); 852 Builder.SetInsertPoint(ElseBB); 853 854 Value *ElseV = Else->codegen(); 855 if (!ElseV) 856 return nullptr; 857 858 Builder.CreateBr(MergeBB); 859 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 860 ElseBB = Builder.GetInsertBlock(); 861 862 // Emit merge block. 863 TheFunction->getBasicBlockList().push_back(MergeBB); 864 Builder.SetInsertPoint(MergeBB); 865 PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); 866 867 PN->addIncoming(ThenV, ThenBB); 868 PN->addIncoming(ElseV, ElseBB); 869 return PN; 870 } 871 872 // Output for-loop as: 873 // var = alloca double 874 // ... 875 // start = startexpr 876 // store start -> var 877 // goto loop 878 // loop: 879 // ... 880 // bodyexpr 881 // ... 882 // loopend: 883 // step = stepexpr 884 // endcond = endexpr 885 // 886 // curvar = load var 887 // nextvar = curvar + step 888 // store nextvar -> var 889 // br endcond, loop, endloop 890 // outloop: 891 Value *ForExprAST::codegen() { 892 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 893 894 // Create an alloca for the variable in the entry block. 895 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 896 897 // Emit the start code first, without 'variable' in scope. 898 Value *StartVal = Start->codegen(); 899 if (!StartVal) 900 return nullptr; 901 902 // Store the value into the alloca. 903 Builder.CreateStore(StartVal, Alloca); 904 905 // Make the new basic block for the loop header, inserting after current 906 // block. 907 BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); 908 909 // Insert an explicit fall through from the current block to the LoopBB. 910 Builder.CreateBr(LoopBB); 911 912 // Start insertion in LoopBB. 913 Builder.SetInsertPoint(LoopBB); 914 915 // Within the loop, the variable is defined equal to the PHI node. If it 916 // shadows an existing variable, we have to restore it, so save it now. 917 AllocaInst *OldVal = NamedValues[VarName]; 918 NamedValues[VarName] = Alloca; 919 920 // Emit the body of the loop. This, like any other expr, can change the 921 // current BB. Note that we ignore the value computed by the body, but don't 922 // allow an error. 923 if (!Body->codegen()) 924 return nullptr; 925 926 // Emit the step value. 927 Value *StepVal = nullptr; 928 if (Step) { 929 StepVal = Step->codegen(); 930 if (!StepVal) 931 return nullptr; 932 } else { 933 // If not specified, use 1.0. 934 StepVal = ConstantFP::get(TheContext, APFloat(1.0)); 935 } 936 937 // Compute the end condition. 938 Value *EndCond = End->codegen(); 939 if (!EndCond) 940 return nullptr; 941 942 // Reload, increment, and restore the alloca. This handles the case where 943 // the body of the loop mutates the variable. 944 Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); 945 Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); 946 Builder.CreateStore(NextVar, Alloca); 947 948 // Convert condition to a bool by comparing equal to 0.0. 949 EndCond = Builder.CreateFCmpONE( 950 EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); 951 952 // Create the "after loop" block and insert it. 953 BasicBlock *AfterBB = 954 BasicBlock::Create(TheContext, "afterloop", TheFunction); 955 956 // Insert the conditional branch into the end of LoopEndBB. 957 Builder.CreateCondBr(EndCond, LoopBB, AfterBB); 958 959 // Any new code will be inserted in AfterBB. 960 Builder.SetInsertPoint(AfterBB); 961 962 // Restore the unshadowed variable. 963 if (OldVal) 964 NamedValues[VarName] = OldVal; 965 else 966 NamedValues.erase(VarName); 967 968 // for expr always returns 0.0. 969 return Constant::getNullValue(Type::getDoubleTy(TheContext)); 970 } 971 972 Value *VarExprAST::codegen() { 973 std::vector<AllocaInst *> OldBindings; 974 975 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 976 977 // Register all variables and emit their initializer. 978 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 979 const std::string &VarName = VarNames[i].first; 980 ExprAST *Init = VarNames[i].second.get(); 981 982 // Emit the initializer before adding the variable to scope, this prevents 983 // the initializer from referencing the variable itself, and permits stuff 984 // like this: 985 // var a = 1 in 986 // var a = a in ... # refers to outer 'a'. 987 Value *InitVal; 988 if (Init) { 989 InitVal = Init->codegen(); 990 if (!InitVal) 991 return nullptr; 992 } else { // If not specified, use 0.0. 993 InitVal = ConstantFP::get(TheContext, APFloat(0.0)); 994 } 995 996 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 997 Builder.CreateStore(InitVal, Alloca); 998 999 // Remember the old variable binding so that we can restore the binding when 1000 // we unrecurse. 1001 OldBindings.push_back(NamedValues[VarName]); 1002 1003 // Remember this binding. 1004 NamedValues[VarName] = Alloca; 1005 } 1006 1007 // Codegen the body, now that all vars are in scope. 1008 Value *BodyVal = Body->codegen(); 1009 if (!BodyVal) 1010 return nullptr; 1011 1012 // Pop all our variables from scope. 1013 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 1014 NamedValues[VarNames[i].first] = OldBindings[i]; 1015 1016 // Return the body computation. 1017 return BodyVal; 1018 } 1019 1020 Function *PrototypeAST::codegen() { 1021 // Make the function type: double(double,double) etc. 1022 std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); 1023 FunctionType *FT = 1024 FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); 1025 1026 Function *F = 1027 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); 1028 1029 // Set names for all arguments. 1030 unsigned Idx = 0; 1031 for (auto &Arg : F->args()) 1032 Arg.setName(Args[Idx++]); 1033 1034 return F; 1035 } 1036 1037 const PrototypeAST& FunctionAST::getProto() const { 1038 return *Proto; 1039 } 1040 1041 const std::string& FunctionAST::getName() const { 1042 return Proto->getName(); 1043 } 1044 1045 Function *FunctionAST::codegen() { 1046 // Transfer ownership of the prototype to the FunctionProtos map, but keep a 1047 // reference to it for use below. 1048 auto &P = *Proto; 1049 Function *TheFunction = getFunction(P.getName()); 1050 if (!TheFunction) 1051 return nullptr; 1052 1053 // If this is an operator, install it. 1054 if (P.isBinaryOp()) 1055 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); 1056 1057 // Create a new basic block to start insertion into. 1058 BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); 1059 Builder.SetInsertPoint(BB); 1060 1061 // Record the function arguments in the NamedValues map. 1062 NamedValues.clear(); 1063 for (auto &Arg : TheFunction->args()) { 1064 // Create an alloca for this variable. 1065 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); 1066 1067 // Store the initial value into the alloca. 1068 Builder.CreateStore(&Arg, Alloca); 1069 1070 // Add arguments to variable symbol table. 1071 NamedValues[Arg.getName()] = Alloca; 1072 } 1073 1074 if (Value *RetVal = Body->codegen()) { 1075 // Finish off the function. 1076 Builder.CreateRet(RetVal); 1077 1078 // Validate the generated code, checking for consistency. 1079 verifyFunction(*TheFunction); 1080 1081 return TheFunction; 1082 } 1083 1084 // Error reading body, remove function. 1085 TheFunction->eraseFromParent(); 1086 1087 if (P.isBinaryOp()) 1088 BinopPrecedence.erase(Proto->getOperatorName()); 1089 return nullptr; 1090 } 1091 1092 //===----------------------------------------------------------------------===// 1093 // Top-Level parsing and JIT Driver 1094 //===----------------------------------------------------------------------===// 1095 1096 static void InitializeModule() { 1097 // Open a new module. 1098 TheModule = std::make_unique<Module>("my cool jit", TheContext); 1099 TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); 1100 } 1101 1102 std::unique_ptr<llvm::Module> 1103 irgenAndTakeOwnership(FunctionAST &FnAST, const std::string &Suffix) { 1104 if (auto *F = FnAST.codegen()) { 1105 F->setName(F->getName() + Suffix); 1106 auto M = std::move(TheModule); 1107 // Start a new module. 1108 InitializeModule(); 1109 return M; 1110 } else 1111 report_fatal_error("Couldn't compile lazily JIT'd function"); 1112 } 1113 1114 static void HandleDefinition() { 1115 if (auto FnAST = ParseDefinition()) { 1116 FunctionProtos[FnAST->getProto().getName()] = 1117 std::make_unique<PrototypeAST>(FnAST->getProto()); 1118 ExitOnErr(TheJIT->addFunctionAST(std::move(FnAST))); 1119 } else { 1120 // Skip token for error recovery. 1121 getNextToken(); 1122 } 1123 } 1124 1125 static void HandleExtern() { 1126 if (auto ProtoAST = ParseExtern()) { 1127 if (auto *FnIR = ProtoAST->codegen()) { 1128 fprintf(stderr, "Read extern: "); 1129 FnIR->print(errs()); 1130 fprintf(stderr, "\n"); 1131 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); 1132 } 1133 } else { 1134 // Skip token for error recovery. 1135 getNextToken(); 1136 } 1137 } 1138 1139 static void HandleTopLevelExpression() { 1140 // Evaluate a top-level expression into an anonymous function. 1141 if (auto FnAST = ParseTopLevelExpr()) { 1142 FunctionProtos[FnAST->getName()] = 1143 std::make_unique<PrototypeAST>(FnAST->getProto()); 1144 if (FnAST->codegen()) { 1145 // JIT the module containing the anonymous expression, keeping a handle so 1146 // we can free it later. 1147 auto H = TheJIT->addModule(std::move(TheModule)); 1148 InitializeModule(); 1149 1150 // Search the JIT for the __anon_expr symbol. 1151 auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); 1152 assert(ExprSymbol && "Function not found"); 1153 1154 // Get the symbol's address and cast it to the right type (takes no 1155 // arguments, returns a double) so we can call it as a native function. 1156 double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); 1157 fprintf(stderr, "Evaluated to %f\n", FP()); 1158 1159 // Delete the anonymous expression module from the JIT. 1160 TheJIT->removeModule(H); 1161 } 1162 } else { 1163 // Skip token for error recovery. 1164 getNextToken(); 1165 } 1166 } 1167 1168 /// top ::= definition | external | expression | ';' 1169 static void MainLoop() { 1170 while (true) { 1171 fprintf(stderr, "ready> "); 1172 switch (CurTok) { 1173 case tok_eof: 1174 return; 1175 case ';': // ignore top-level semicolons. 1176 getNextToken(); 1177 break; 1178 case tok_def: 1179 HandleDefinition(); 1180 break; 1181 case tok_extern: 1182 HandleExtern(); 1183 break; 1184 default: 1185 HandleTopLevelExpression(); 1186 break; 1187 } 1188 } 1189 } 1190 1191 //===----------------------------------------------------------------------===// 1192 // "Library" functions that can be "extern'd" from user code. 1193 //===----------------------------------------------------------------------===// 1194 1195 /// putchard - putchar that takes a double and returns 0. 1196 extern "C" double putchard(double X) { 1197 fputc((char)X, stderr); 1198 return 0; 1199 } 1200 1201 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1202 extern "C" double printd(double X) { 1203 fprintf(stderr, "%f\n", X); 1204 return 0; 1205 } 1206 1207 //===----------------------------------------------------------------------===// 1208 // Main driver code. 1209 //===----------------------------------------------------------------------===// 1210 1211 int main() { 1212 InitializeNativeTarget(); 1213 InitializeNativeTargetAsmPrinter(); 1214 InitializeNativeTargetAsmParser(); 1215 1216 ExitOnErr.setBanner("Kaleidoscope: "); 1217 1218 // Install standard binary operators. 1219 // 1 is lowest precedence. 1220 BinopPrecedence['='] = 2; 1221 BinopPrecedence['<'] = 10; 1222 BinopPrecedence['+'] = 20; 1223 BinopPrecedence['-'] = 20; 1224 BinopPrecedence['*'] = 40; // highest. 1225 1226 // Prime the first token. 1227 fprintf(stderr, "ready> "); 1228 getNextToken(); 1229 1230 TheJIT = std::make_unique<KaleidoscopeJIT>(); 1231 1232 InitializeModule(); 1233 1234 // Run the main "interpreter loop" now. 1235 MainLoop(); 1236 1237 return 0; 1238 } 1239