1 #include "llvm/ADT/APFloat.h" 2 #include "llvm/ADT/STLExtras.h" 3 #include "llvm/IR/BasicBlock.h" 4 #include "llvm/IR/Constants.h" 5 #include "llvm/IR/DerivedTypes.h" 6 #include "llvm/IR/Function.h" 7 #include "llvm/IR/Instructions.h" 8 #include "llvm/IR/IRBuilder.h" 9 #include "llvm/IR/LLVMContext.h" 10 #include "llvm/IR/Module.h" 11 #include "llvm/IR/Type.h" 12 #include "llvm/IR/Verifier.h" 13 #include "llvm/Support/TargetSelect.h" 14 #include "llvm/Target/TargetMachine.h" 15 #include "KaleidoscopeJIT.h" 16 #include <algorithm> 17 #include <cassert> 18 #include <cctype> 19 #include <cstdint> 20 #include <cstdio> 21 #include <cstdlib> 22 #include <map> 23 #include <memory> 24 #include <string> 25 #include <utility> 26 #include <vector> 27 28 using namespace llvm; 29 using namespace llvm::orc; 30 31 //===----------------------------------------------------------------------===// 32 // Lexer 33 //===----------------------------------------------------------------------===// 34 35 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 36 // of these for known things. 37 enum Token { 38 tok_eof = -1, 39 40 // commands 41 tok_def = -2, 42 tok_extern = -3, 43 44 // primary 45 tok_identifier = -4, 46 tok_number = -5, 47 48 // control 49 tok_if = -6, 50 tok_then = -7, 51 tok_else = -8, 52 tok_for = -9, 53 tok_in = -10, 54 55 // operators 56 tok_binary = -11, 57 tok_unary = -12, 58 59 // var definition 60 tok_var = -13 61 }; 62 63 static std::string IdentifierStr; // Filled in if tok_identifier 64 static double NumVal; // Filled in if tok_number 65 66 /// gettok - Return the next token from standard input. 67 static int gettok() { 68 static int LastChar = ' '; 69 70 // Skip any whitespace. 71 while (isspace(LastChar)) 72 LastChar = getchar(); 73 74 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 75 IdentifierStr = LastChar; 76 while (isalnum((LastChar = getchar()))) 77 IdentifierStr += LastChar; 78 79 if (IdentifierStr == "def") 80 return tok_def; 81 if (IdentifierStr == "extern") 82 return tok_extern; 83 if (IdentifierStr == "if") 84 return tok_if; 85 if (IdentifierStr == "then") 86 return tok_then; 87 if (IdentifierStr == "else") 88 return tok_else; 89 if (IdentifierStr == "for") 90 return tok_for; 91 if (IdentifierStr == "in") 92 return tok_in; 93 if (IdentifierStr == "binary") 94 return tok_binary; 95 if (IdentifierStr == "unary") 96 return tok_unary; 97 if (IdentifierStr == "var") 98 return tok_var; 99 return tok_identifier; 100 } 101 102 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 103 std::string NumStr; 104 do { 105 NumStr += LastChar; 106 LastChar = getchar(); 107 } while (isdigit(LastChar) || LastChar == '.'); 108 109 NumVal = strtod(NumStr.c_str(), nullptr); 110 return tok_number; 111 } 112 113 if (LastChar == '#') { 114 // Comment until end of line. 115 do 116 LastChar = getchar(); 117 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 118 119 if (LastChar != EOF) 120 return gettok(); 121 } 122 123 // Check for end of file. Don't eat the EOF. 124 if (LastChar == EOF) 125 return tok_eof; 126 127 // Otherwise, just return the character as its ascii value. 128 int ThisChar = LastChar; 129 LastChar = getchar(); 130 return ThisChar; 131 } 132 133 //===----------------------------------------------------------------------===// 134 // Abstract Syntax Tree (aka Parse Tree) 135 //===----------------------------------------------------------------------===// 136 137 namespace { 138 139 /// ExprAST - Base class for all expression nodes. 140 class ExprAST { 141 public: 142 virtual ~ExprAST() = default; 143 144 virtual Value *codegen() = 0; 145 }; 146 147 /// NumberExprAST - Expression class for numeric literals like "1.0". 148 class NumberExprAST : public ExprAST { 149 double Val; 150 151 public: 152 NumberExprAST(double Val) : Val(Val) {} 153 154 Value *codegen() override; 155 }; 156 157 /// VariableExprAST - Expression class for referencing a variable, like "a". 158 class VariableExprAST : public ExprAST { 159 std::string Name; 160 161 public: 162 VariableExprAST(const std::string &Name) : Name(Name) {} 163 164 Value *codegen() override; 165 const std::string &getName() const { return Name; } 166 }; 167 168 /// UnaryExprAST - Expression class for a unary operator. 169 class UnaryExprAST : public ExprAST { 170 char Opcode; 171 std::unique_ptr<ExprAST> Operand; 172 173 public: 174 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 175 : Opcode(Opcode), Operand(std::move(Operand)) {} 176 177 Value *codegen() override; 178 }; 179 180 /// BinaryExprAST - Expression class for a binary operator. 181 class BinaryExprAST : public ExprAST { 182 char Op; 183 std::unique_ptr<ExprAST> LHS, RHS; 184 185 public: 186 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 187 std::unique_ptr<ExprAST> RHS) 188 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 189 190 Value *codegen() override; 191 }; 192 193 /// CallExprAST - Expression class for function calls. 194 class CallExprAST : public ExprAST { 195 std::string Callee; 196 std::vector<std::unique_ptr<ExprAST>> Args; 197 198 public: 199 CallExprAST(const std::string &Callee, 200 std::vector<std::unique_ptr<ExprAST>> Args) 201 : Callee(Callee), Args(std::move(Args)) {} 202 203 Value *codegen() override; 204 }; 205 206 /// IfExprAST - Expression class for if/then/else. 207 class IfExprAST : public ExprAST { 208 std::unique_ptr<ExprAST> Cond, Then, Else; 209 210 public: 211 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, 212 std::unique_ptr<ExprAST> Else) 213 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} 214 215 Value *codegen() override; 216 }; 217 218 /// ForExprAST - Expression class for for/in. 219 class ForExprAST : public ExprAST { 220 std::string VarName; 221 std::unique_ptr<ExprAST> Start, End, Step, Body; 222 223 public: 224 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, 225 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, 226 std::unique_ptr<ExprAST> Body) 227 : VarName(VarName), Start(std::move(Start)), End(std::move(End)), 228 Step(std::move(Step)), Body(std::move(Body)) {} 229 230 Value *codegen() override; 231 }; 232 233 /// VarExprAST - Expression class for var/in 234 class VarExprAST : public ExprAST { 235 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 236 std::unique_ptr<ExprAST> Body; 237 238 public: 239 VarExprAST( 240 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, 241 std::unique_ptr<ExprAST> Body) 242 : VarNames(std::move(VarNames)), Body(std::move(Body)) {} 243 244 Value *codegen() override; 245 }; 246 247 /// PrototypeAST - This class represents the "prototype" for a function, 248 /// which captures its name, and its argument names (thus implicitly the number 249 /// of arguments the function takes), as well as if it is an operator. 250 class PrototypeAST { 251 std::string Name; 252 std::vector<std::string> Args; 253 bool IsOperator; 254 unsigned Precedence; // Precedence if a binary op. 255 256 public: 257 PrototypeAST(const std::string &Name, std::vector<std::string> Args, 258 bool IsOperator = false, unsigned Prec = 0) 259 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), 260 Precedence(Prec) {} 261 262 Function *codegen(); 263 const std::string &getName() const { return Name; } 264 265 bool isUnaryOp() const { return IsOperator && Args.size() == 1; } 266 bool isBinaryOp() const { return IsOperator && Args.size() == 2; } 267 268 char getOperatorName() const { 269 assert(isUnaryOp() || isBinaryOp()); 270 return Name[Name.size() - 1]; 271 } 272 273 unsigned getBinaryPrecedence() const { return Precedence; } 274 }; 275 276 /// FunctionAST - This class represents a function definition itself. 277 class FunctionAST { 278 std::unique_ptr<PrototypeAST> Proto; 279 std::unique_ptr<ExprAST> Body; 280 281 public: 282 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 283 std::unique_ptr<ExprAST> Body) 284 : Proto(std::move(Proto)), Body(std::move(Body)) {} 285 286 Function *codegen(); 287 }; 288 289 } // end anonymous namespace 290 291 //===----------------------------------------------------------------------===// 292 // Parser 293 //===----------------------------------------------------------------------===// 294 295 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 296 /// token the parser is looking at. getNextToken reads another token from the 297 /// lexer and updates CurTok with its results. 298 static int CurTok; 299 static int getNextToken() { return CurTok = gettok(); } 300 301 /// BinopPrecedence - This holds the precedence for each binary operator that is 302 /// defined. 303 static std::map<char, int> BinopPrecedence; 304 305 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 306 static int GetTokPrecedence() { 307 if (!isascii(CurTok)) 308 return -1; 309 310 // Make sure it's a declared binop. 311 int TokPrec = BinopPrecedence[CurTok]; 312 if (TokPrec <= 0) 313 return -1; 314 return TokPrec; 315 } 316 317 /// LogError* - These are little helper functions for error handling. 318 std::unique_ptr<ExprAST> LogError(const char *Str) { 319 fprintf(stderr, "Error: %s\n", Str); 320 return nullptr; 321 } 322 323 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 324 LogError(Str); 325 return nullptr; 326 } 327 328 static std::unique_ptr<ExprAST> ParseExpression(); 329 330 /// numberexpr ::= number 331 static std::unique_ptr<ExprAST> ParseNumberExpr() { 332 auto Result = llvm::make_unique<NumberExprAST>(NumVal); 333 getNextToken(); // consume the number 334 return std::move(Result); 335 } 336 337 /// parenexpr ::= '(' expression ')' 338 static std::unique_ptr<ExprAST> ParseParenExpr() { 339 getNextToken(); // eat (. 340 auto V = ParseExpression(); 341 if (!V) 342 return nullptr; 343 344 if (CurTok != ')') 345 return LogError("expected ')'"); 346 getNextToken(); // eat ). 347 return V; 348 } 349 350 /// identifierexpr 351 /// ::= identifier 352 /// ::= identifier '(' expression* ')' 353 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 354 std::string IdName = IdentifierStr; 355 356 getNextToken(); // eat identifier. 357 358 if (CurTok != '(') // Simple variable ref. 359 return llvm::make_unique<VariableExprAST>(IdName); 360 361 // Call. 362 getNextToken(); // eat ( 363 std::vector<std::unique_ptr<ExprAST>> Args; 364 if (CurTok != ')') { 365 while (true) { 366 if (auto Arg = ParseExpression()) 367 Args.push_back(std::move(Arg)); 368 else 369 return nullptr; 370 371 if (CurTok == ')') 372 break; 373 374 if (CurTok != ',') 375 return LogError("Expected ')' or ',' in argument list"); 376 getNextToken(); 377 } 378 } 379 380 // Eat the ')'. 381 getNextToken(); 382 383 return llvm::make_unique<CallExprAST>(IdName, std::move(Args)); 384 } 385 386 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 387 static std::unique_ptr<ExprAST> ParseIfExpr() { 388 getNextToken(); // eat the if. 389 390 // condition. 391 auto Cond = ParseExpression(); 392 if (!Cond) 393 return nullptr; 394 395 if (CurTok != tok_then) 396 return LogError("expected then"); 397 getNextToken(); // eat the then 398 399 auto Then = ParseExpression(); 400 if (!Then) 401 return nullptr; 402 403 if (CurTok != tok_else) 404 return LogError("expected else"); 405 406 getNextToken(); 407 408 auto Else = ParseExpression(); 409 if (!Else) 410 return nullptr; 411 412 return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then), 413 std::move(Else)); 414 } 415 416 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 417 static std::unique_ptr<ExprAST> ParseForExpr() { 418 getNextToken(); // eat the for. 419 420 if (CurTok != tok_identifier) 421 return LogError("expected identifier after for"); 422 423 std::string IdName = IdentifierStr; 424 getNextToken(); // eat identifier. 425 426 if (CurTok != '=') 427 return LogError("expected '=' after for"); 428 getNextToken(); // eat '='. 429 430 auto Start = ParseExpression(); 431 if (!Start) 432 return nullptr; 433 if (CurTok != ',') 434 return LogError("expected ',' after for start value"); 435 getNextToken(); 436 437 auto End = ParseExpression(); 438 if (!End) 439 return nullptr; 440 441 // The step value is optional. 442 std::unique_ptr<ExprAST> Step; 443 if (CurTok == ',') { 444 getNextToken(); 445 Step = ParseExpression(); 446 if (!Step) 447 return nullptr; 448 } 449 450 if (CurTok != tok_in) 451 return LogError("expected 'in' after for"); 452 getNextToken(); // eat 'in'. 453 454 auto Body = ParseExpression(); 455 if (!Body) 456 return nullptr; 457 458 return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), 459 std::move(Step), std::move(Body)); 460 } 461 462 /// varexpr ::= 'var' identifier ('=' expression)? 463 // (',' identifier ('=' expression)?)* 'in' expression 464 static std::unique_ptr<ExprAST> ParseVarExpr() { 465 getNextToken(); // eat the var. 466 467 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 468 469 // At least one variable name is required. 470 if (CurTok != tok_identifier) 471 return LogError("expected identifier after var"); 472 473 while (true) { 474 std::string Name = IdentifierStr; 475 getNextToken(); // eat identifier. 476 477 // Read the optional initializer. 478 std::unique_ptr<ExprAST> Init = nullptr; 479 if (CurTok == '=') { 480 getNextToken(); // eat the '='. 481 482 Init = ParseExpression(); 483 if (!Init) 484 return nullptr; 485 } 486 487 VarNames.push_back(std::make_pair(Name, std::move(Init))); 488 489 // End of var list, exit loop. 490 if (CurTok != ',') 491 break; 492 getNextToken(); // eat the ','. 493 494 if (CurTok != tok_identifier) 495 return LogError("expected identifier list after var"); 496 } 497 498 // At this point, we have to have 'in'. 499 if (CurTok != tok_in) 500 return LogError("expected 'in' keyword after 'var'"); 501 getNextToken(); // eat 'in'. 502 503 auto Body = ParseExpression(); 504 if (!Body) 505 return nullptr; 506 507 return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); 508 } 509 510 /// primary 511 /// ::= identifierexpr 512 /// ::= numberexpr 513 /// ::= parenexpr 514 /// ::= ifexpr 515 /// ::= forexpr 516 /// ::= varexpr 517 static std::unique_ptr<ExprAST> ParsePrimary() { 518 switch (CurTok) { 519 default: 520 return LogError("unknown token when expecting an expression"); 521 case tok_identifier: 522 return ParseIdentifierExpr(); 523 case tok_number: 524 return ParseNumberExpr(); 525 case '(': 526 return ParseParenExpr(); 527 case tok_if: 528 return ParseIfExpr(); 529 case tok_for: 530 return ParseForExpr(); 531 case tok_var: 532 return ParseVarExpr(); 533 } 534 } 535 536 /// unary 537 /// ::= primary 538 /// ::= '!' unary 539 static std::unique_ptr<ExprAST> ParseUnary() { 540 // If the current token is not an operator, it must be a primary expr. 541 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 542 return ParsePrimary(); 543 544 // If this is a unary operator, read it. 545 int Opc = CurTok; 546 getNextToken(); 547 if (auto Operand = ParseUnary()) 548 return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand)); 549 return nullptr; 550 } 551 552 /// binoprhs 553 /// ::= ('+' unary)* 554 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 555 std::unique_ptr<ExprAST> LHS) { 556 // If this is a binop, find its precedence. 557 while (true) { 558 int TokPrec = GetTokPrecedence(); 559 560 // If this is a binop that binds at least as tightly as the current binop, 561 // consume it, otherwise we are done. 562 if (TokPrec < ExprPrec) 563 return LHS; 564 565 // Okay, we know this is a binop. 566 int BinOp = CurTok; 567 getNextToken(); // eat binop 568 569 // Parse the unary expression after the binary operator. 570 auto RHS = ParseUnary(); 571 if (!RHS) 572 return nullptr; 573 574 // If BinOp binds less tightly with RHS than the operator after RHS, let 575 // the pending operator take RHS as its LHS. 576 int NextPrec = GetTokPrecedence(); 577 if (TokPrec < NextPrec) { 578 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 579 if (!RHS) 580 return nullptr; 581 } 582 583 // Merge LHS/RHS. 584 LHS = 585 llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 586 } 587 } 588 589 /// expression 590 /// ::= unary binoprhs 591 /// 592 static std::unique_ptr<ExprAST> ParseExpression() { 593 auto LHS = ParseUnary(); 594 if (!LHS) 595 return nullptr; 596 597 return ParseBinOpRHS(0, std::move(LHS)); 598 } 599 600 /// prototype 601 /// ::= id '(' id* ')' 602 /// ::= binary LETTER number? (id, id) 603 /// ::= unary LETTER (id) 604 static std::unique_ptr<PrototypeAST> ParsePrototype() { 605 std::string FnName; 606 607 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 608 unsigned BinaryPrecedence = 30; 609 610 switch (CurTok) { 611 default: 612 return LogErrorP("Expected function name in prototype"); 613 case tok_identifier: 614 FnName = IdentifierStr; 615 Kind = 0; 616 getNextToken(); 617 break; 618 case tok_unary: 619 getNextToken(); 620 if (!isascii(CurTok)) 621 return LogErrorP("Expected unary operator"); 622 FnName = "unary"; 623 FnName += (char)CurTok; 624 Kind = 1; 625 getNextToken(); 626 break; 627 case tok_binary: 628 getNextToken(); 629 if (!isascii(CurTok)) 630 return LogErrorP("Expected binary operator"); 631 FnName = "binary"; 632 FnName += (char)CurTok; 633 Kind = 2; 634 getNextToken(); 635 636 // Read the precedence if present. 637 if (CurTok == tok_number) { 638 if (NumVal < 1 || NumVal > 100) 639 return LogErrorP("Invalid precedecnce: must be 1..100"); 640 BinaryPrecedence = (unsigned)NumVal; 641 getNextToken(); 642 } 643 break; 644 } 645 646 if (CurTok != '(') 647 return LogErrorP("Expected '(' in prototype"); 648 649 std::vector<std::string> ArgNames; 650 while (getNextToken() == tok_identifier) 651 ArgNames.push_back(IdentifierStr); 652 if (CurTok != ')') 653 return LogErrorP("Expected ')' in prototype"); 654 655 // success. 656 getNextToken(); // eat ')'. 657 658 // Verify right number of names for operator. 659 if (Kind && ArgNames.size() != Kind) 660 return LogErrorP("Invalid number of operands for operator"); 661 662 return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, 663 BinaryPrecedence); 664 } 665 666 /// definition ::= 'def' prototype expression 667 static std::unique_ptr<FunctionAST> ParseDefinition() { 668 getNextToken(); // eat def. 669 auto Proto = ParsePrototype(); 670 if (!Proto) 671 return nullptr; 672 673 if (auto E = ParseExpression()) 674 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 675 return nullptr; 676 } 677 678 /// toplevelexpr ::= expression 679 static std::unique_ptr<FunctionAST> ParseTopLevelExpr(unsigned ExprCount) { 680 if (auto E = ParseExpression()) { 681 // Make an anonymous proto. 682 auto Proto = llvm::make_unique<PrototypeAST>(("__anon_expr" + 683 Twine(ExprCount)).str(), 684 std::vector<std::string>()); 685 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 686 } 687 return nullptr; 688 } 689 690 /// external ::= 'extern' prototype 691 static std::unique_ptr<PrototypeAST> ParseExtern() { 692 getNextToken(); // eat extern. 693 return ParsePrototype(); 694 } 695 696 //===----------------------------------------------------------------------===// 697 // Code Generation 698 //===----------------------------------------------------------------------===// 699 700 static std::unique_ptr<KaleidoscopeJIT> TheJIT; 701 static LLVMContext *TheContext; 702 static std::unique_ptr<IRBuilder<>> Builder; 703 static std::unique_ptr<Module> TheModule; 704 static std::map<std::string, AllocaInst *> NamedValues; 705 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; 706 static ExitOnError ExitOnErr; 707 708 Value *LogErrorV(const char *Str) { 709 LogError(Str); 710 return nullptr; 711 } 712 713 Function *getFunction(std::string Name) { 714 // First, see if the function has already been added to the current module. 715 if (auto *F = TheModule->getFunction(Name)) 716 return F; 717 718 // If not, check whether we can codegen the declaration from some existing 719 // prototype. 720 auto FI = FunctionProtos.find(Name); 721 if (FI != FunctionProtos.end()) 722 return FI->second->codegen(); 723 724 // If no existing prototype exists, return null. 725 return nullptr; 726 } 727 728 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 729 /// the function. This is used for mutable variables etc. 730 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 731 const std::string &VarName) { 732 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 733 TheFunction->getEntryBlock().begin()); 734 return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName); 735 } 736 737 Value *NumberExprAST::codegen() { 738 return ConstantFP::get(*TheContext, APFloat(Val)); 739 } 740 741 Value *VariableExprAST::codegen() { 742 // Look this variable up in the function. 743 Value *V = NamedValues[Name]; 744 if (!V) 745 return LogErrorV("Unknown variable name"); 746 747 // Load the value. 748 return Builder->CreateLoad(V, Name.c_str()); 749 } 750 751 Value *UnaryExprAST::codegen() { 752 Value *OperandV = Operand->codegen(); 753 if (!OperandV) 754 return nullptr; 755 756 Function *F = getFunction(std::string("unary") + Opcode); 757 if (!F) 758 return LogErrorV("Unknown unary operator"); 759 760 return Builder->CreateCall(F, OperandV, "unop"); 761 } 762 763 Value *BinaryExprAST::codegen() { 764 // Special case '=' because we don't want to emit the LHS as an expression. 765 if (Op == '=') { 766 // Assignment requires the LHS to be an identifier. 767 // This assume we're building without RTTI because LLVM builds that way by 768 // default. If you build LLVM with RTTI this can be changed to a 769 // dynamic_cast for automatic error checking. 770 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); 771 if (!LHSE) 772 return LogErrorV("destination of '=' must be a variable"); 773 // Codegen the RHS. 774 Value *Val = RHS->codegen(); 775 if (!Val) 776 return nullptr; 777 778 // Look up the name. 779 Value *Variable = NamedValues[LHSE->getName()]; 780 if (!Variable) 781 return LogErrorV("Unknown variable name"); 782 783 Builder->CreateStore(Val, Variable); 784 return Val; 785 } 786 787 Value *L = LHS->codegen(); 788 Value *R = RHS->codegen(); 789 if (!L || !R) 790 return nullptr; 791 792 switch (Op) { 793 case '+': 794 return Builder->CreateFAdd(L, R, "addtmp"); 795 case '-': 796 return Builder->CreateFSub(L, R, "subtmp"); 797 case '*': 798 return Builder->CreateFMul(L, R, "multmp"); 799 case '<': 800 L = Builder->CreateFCmpULT(L, R, "cmptmp"); 801 // Convert bool 0/1 to double 0.0 or 1.0 802 return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp"); 803 default: 804 break; 805 } 806 807 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 808 // a call to it. 809 Function *F = getFunction(std::string("binary") + Op); 810 assert(F && "binary operator not found!"); 811 812 Value *Ops[] = {L, R}; 813 return Builder->CreateCall(F, Ops, "binop"); 814 } 815 816 Value *CallExprAST::codegen() { 817 // Look up the name in the global module table. 818 Function *CalleeF = getFunction(Callee); 819 if (!CalleeF) 820 return LogErrorV("Unknown function referenced"); 821 822 // If argument mismatch error. 823 if (CalleeF->arg_size() != Args.size()) 824 return LogErrorV("Incorrect # arguments passed"); 825 826 std::vector<Value *> ArgsV; 827 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 828 ArgsV.push_back(Args[i]->codegen()); 829 if (!ArgsV.back()) 830 return nullptr; 831 } 832 833 return Builder->CreateCall(CalleeF, ArgsV, "calltmp"); 834 } 835 836 Value *IfExprAST::codegen() { 837 Value *CondV = Cond->codegen(); 838 if (!CondV) 839 return nullptr; 840 841 // Convert condition to a bool by comparing equal to 0.0. 842 CondV = Builder->CreateFCmpONE( 843 CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond"); 844 845 Function *TheFunction = Builder->GetInsertBlock()->getParent(); 846 847 // Create blocks for the then and else cases. Insert the 'then' block at the 848 // end of the function. 849 BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction); 850 BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else"); 851 BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont"); 852 853 Builder->CreateCondBr(CondV, ThenBB, ElseBB); 854 855 // Emit then value. 856 Builder->SetInsertPoint(ThenBB); 857 858 Value *ThenV = Then->codegen(); 859 if (!ThenV) 860 return nullptr; 861 862 Builder->CreateBr(MergeBB); 863 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 864 ThenBB = Builder->GetInsertBlock(); 865 866 // Emit else block. 867 TheFunction->getBasicBlockList().push_back(ElseBB); 868 Builder->SetInsertPoint(ElseBB); 869 870 Value *ElseV = Else->codegen(); 871 if (!ElseV) 872 return nullptr; 873 874 Builder->CreateBr(MergeBB); 875 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 876 ElseBB = Builder->GetInsertBlock(); 877 878 // Emit merge block. 879 TheFunction->getBasicBlockList().push_back(MergeBB); 880 Builder->SetInsertPoint(MergeBB); 881 PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp"); 882 883 PN->addIncoming(ThenV, ThenBB); 884 PN->addIncoming(ElseV, ElseBB); 885 return PN; 886 } 887 888 // Output for-loop as: 889 // var = alloca double 890 // ... 891 // start = startexpr 892 // store start -> var 893 // goto loop 894 // loop: 895 // ... 896 // bodyexpr 897 // ... 898 // loopend: 899 // step = stepexpr 900 // endcond = endexpr 901 // 902 // curvar = load var 903 // nextvar = curvar + step 904 // store nextvar -> var 905 // br endcond, loop, endloop 906 // outloop: 907 Value *ForExprAST::codegen() { 908 Function *TheFunction = Builder->GetInsertBlock()->getParent(); 909 910 // Create an alloca for the variable in the entry block. 911 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 912 913 // Emit the start code first, without 'variable' in scope. 914 Value *StartVal = Start->codegen(); 915 if (!StartVal) 916 return nullptr; 917 918 // Store the value into the alloca. 919 Builder->CreateStore(StartVal, Alloca); 920 921 // Make the new basic block for the loop header, inserting after current 922 // block. 923 BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction); 924 925 // Insert an explicit fall through from the current block to the LoopBB. 926 Builder->CreateBr(LoopBB); 927 928 // Start insertion in LoopBB. 929 Builder->SetInsertPoint(LoopBB); 930 931 // Within the loop, the variable is defined equal to the PHI node. If it 932 // shadows an existing variable, we have to restore it, so save it now. 933 AllocaInst *OldVal = NamedValues[VarName]; 934 NamedValues[VarName] = Alloca; 935 936 // Emit the body of the loop. This, like any other expr, can change the 937 // current BB. Note that we ignore the value computed by the body, but don't 938 // allow an error. 939 if (!Body->codegen()) 940 return nullptr; 941 942 // Emit the step value. 943 Value *StepVal = nullptr; 944 if (Step) { 945 StepVal = Step->codegen(); 946 if (!StepVal) 947 return nullptr; 948 } else { 949 // If not specified, use 1.0. 950 StepVal = ConstantFP::get(*TheContext, APFloat(1.0)); 951 } 952 953 // Compute the end condition. 954 Value *EndCond = End->codegen(); 955 if (!EndCond) 956 return nullptr; 957 958 // Reload, increment, and restore the alloca. This handles the case where 959 // the body of the loop mutates the variable. 960 Value *CurVar = Builder->CreateLoad(Alloca, VarName.c_str()); 961 Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar"); 962 Builder->CreateStore(NextVar, Alloca); 963 964 // Convert condition to a bool by comparing equal to 0.0. 965 EndCond = Builder->CreateFCmpONE( 966 EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond"); 967 968 // Create the "after loop" block and insert it. 969 BasicBlock *AfterBB = 970 BasicBlock::Create(*TheContext, "afterloop", TheFunction); 971 972 // Insert the conditional branch into the end of LoopEndBB. 973 Builder->CreateCondBr(EndCond, LoopBB, AfterBB); 974 975 // Any new code will be inserted in AfterBB. 976 Builder->SetInsertPoint(AfterBB); 977 978 // Restore the unshadowed variable. 979 if (OldVal) 980 NamedValues[VarName] = OldVal; 981 else 982 NamedValues.erase(VarName); 983 984 // for expr always returns 0.0. 985 return Constant::getNullValue(Type::getDoubleTy(*TheContext)); 986 } 987 988 Value *VarExprAST::codegen() { 989 std::vector<AllocaInst *> OldBindings; 990 991 Function *TheFunction = Builder->GetInsertBlock()->getParent(); 992 993 // Register all variables and emit their initializer. 994 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 995 const std::string &VarName = VarNames[i].first; 996 ExprAST *Init = VarNames[i].second.get(); 997 998 // Emit the initializer before adding the variable to scope, this prevents 999 // the initializer from referencing the variable itself, and permits stuff 1000 // like this: 1001 // var a = 1 in 1002 // var a = a in ... # refers to outer 'a'. 1003 Value *InitVal; 1004 if (Init) { 1005 InitVal = Init->codegen(); 1006 if (!InitVal) 1007 return nullptr; 1008 } else { // If not specified, use 0.0. 1009 InitVal = ConstantFP::get(*TheContext, APFloat(0.0)); 1010 } 1011 1012 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 1013 Builder->CreateStore(InitVal, Alloca); 1014 1015 // Remember the old variable binding so that we can restore the binding when 1016 // we unrecurse. 1017 OldBindings.push_back(NamedValues[VarName]); 1018 1019 // Remember this binding. 1020 NamedValues[VarName] = Alloca; 1021 } 1022 1023 // Codegen the body, now that all vars are in scope. 1024 Value *BodyVal = Body->codegen(); 1025 if (!BodyVal) 1026 return nullptr; 1027 1028 // Pop all our variables from scope. 1029 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 1030 NamedValues[VarNames[i].first] = OldBindings[i]; 1031 1032 // Return the body computation. 1033 return BodyVal; 1034 } 1035 1036 Function *PrototypeAST::codegen() { 1037 // Make the function type: double(double,double) etc. 1038 std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext)); 1039 FunctionType *FT = 1040 FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false); 1041 1042 Function *F = 1043 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); 1044 1045 // Set names for all arguments. 1046 unsigned Idx = 0; 1047 for (auto &Arg : F->args()) 1048 Arg.setName(Args[Idx++]); 1049 1050 return F; 1051 } 1052 1053 Function *FunctionAST::codegen() { 1054 // Transfer ownership of the prototype to the FunctionProtos map, but keep a 1055 // reference to it for use below. 1056 auto &P = *Proto; 1057 FunctionProtos[Proto->getName()] = std::move(Proto); 1058 Function *TheFunction = getFunction(P.getName()); 1059 if (!TheFunction) 1060 return nullptr; 1061 1062 // If this is an operator, install it. 1063 if (P.isBinaryOp()) 1064 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); 1065 1066 // Create a new basic block to start insertion into. 1067 BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction); 1068 Builder->SetInsertPoint(BB); 1069 1070 // Record the function arguments in the NamedValues map. 1071 NamedValues.clear(); 1072 for (auto &Arg : TheFunction->args()) { 1073 // Create an alloca for this variable. 1074 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); 1075 1076 // Store the initial value into the alloca. 1077 Builder->CreateStore(&Arg, Alloca); 1078 1079 // Add arguments to variable symbol table. 1080 NamedValues[Arg.getName()] = Alloca; 1081 } 1082 1083 if (Value *RetVal = Body->codegen()) { 1084 // Finish off the function. 1085 Builder->CreateRet(RetVal); 1086 1087 // Validate the generated code, checking for consistency. 1088 verifyFunction(*TheFunction); 1089 1090 return TheFunction; 1091 } 1092 1093 // Error reading body, remove function. 1094 TheFunction->eraseFromParent(); 1095 1096 if (P.isBinaryOp()) 1097 BinopPrecedence.erase(P.getOperatorName()); 1098 return nullptr; 1099 } 1100 1101 //===----------------------------------------------------------------------===// 1102 // Top-Level parsing and JIT Driver 1103 //===----------------------------------------------------------------------===// 1104 1105 static void InitializeModule() { 1106 // Open a new module. 1107 TheModule = llvm::make_unique<Module>("my cool jit", *TheContext); 1108 TheModule->setDataLayout(TheJIT->getDataLayout()); 1109 1110 // Create a new builder for the module. 1111 Builder = llvm::make_unique<IRBuilder<>>(*TheContext); 1112 } 1113 1114 static void HandleDefinition() { 1115 if (auto FnAST = ParseDefinition()) { 1116 if (auto *FnIR = FnAST->codegen()) { 1117 fprintf(stderr, "Read function definition:"); 1118 FnIR->print(errs()); 1119 fprintf(stderr, "\n"); 1120 ExitOnErr(TheJIT->addModule(std::move(TheModule))); 1121 InitializeModule(); 1122 } 1123 } else { 1124 // Skip token for error recovery. 1125 getNextToken(); 1126 } 1127 } 1128 1129 static void HandleExtern() { 1130 if (auto ProtoAST = ParseExtern()) { 1131 if (auto *FnIR = ProtoAST->codegen()) { 1132 fprintf(stderr, "Read extern: "); 1133 FnIR->print(errs()); 1134 fprintf(stderr, "\n"); 1135 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); 1136 } 1137 } else { 1138 // Skip token for error recovery. 1139 getNextToken(); 1140 } 1141 } 1142 1143 static void HandleTopLevelExpression() { 1144 static unsigned ExprCount = 0; 1145 1146 // Update ExprCount. This number will be added to anonymous expressions to 1147 // prevent them from clashing. 1148 ++ExprCount; 1149 1150 // Evaluate a top-level expression into an anonymous function. 1151 if (auto FnAST = ParseTopLevelExpr(ExprCount)) { 1152 if (FnAST->codegen()) { 1153 // JIT the module containing the anonymous expression, keeping a handle so 1154 // we can free it later. 1155 ExitOnErr(TheJIT->addModule(std::move(TheModule))); 1156 InitializeModule(); 1157 1158 // Get the anonymous expression's JITSymbol. 1159 auto Sym = 1160 ExitOnErr(TheJIT->lookup(("__anon_expr" + Twine(ExprCount)).str())); 1161 1162 auto *FP = (double (*)())(intptr_t)Sym.getAddress(); 1163 assert(FP && "Failed to codegen function"); 1164 fprintf(stderr, "Evaluated to %f\n", FP()); 1165 } 1166 } else { 1167 // Skip token for error recovery. 1168 getNextToken(); 1169 } 1170 } 1171 1172 /// top ::= definition | external | expression | ';' 1173 static void MainLoop() { 1174 while (true) { 1175 fprintf(stderr, "ready> "); 1176 switch (CurTok) { 1177 case tok_eof: 1178 return; 1179 case ';': // ignore top-level semicolons. 1180 getNextToken(); 1181 break; 1182 case tok_def: 1183 HandleDefinition(); 1184 break; 1185 case tok_extern: 1186 HandleExtern(); 1187 break; 1188 default: 1189 HandleTopLevelExpression(); 1190 break; 1191 } 1192 } 1193 } 1194 1195 //===----------------------------------------------------------------------===// 1196 // "Library" functions that can be "extern'd" from user code. 1197 //===----------------------------------------------------------------------===// 1198 1199 /// putchard - putchar that takes a double and returns 0. 1200 extern "C" double putchard(double X) { 1201 fputc((char)X, stderr); 1202 return 0; 1203 } 1204 1205 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1206 extern "C" double printd(double X) { 1207 fprintf(stderr, "%f\n", X); 1208 return 0; 1209 } 1210 1211 //===----------------------------------------------------------------------===// 1212 // Main driver code. 1213 //===----------------------------------------------------------------------===// 1214 1215 int main() { 1216 InitializeNativeTarget(); 1217 InitializeNativeTargetAsmPrinter(); 1218 InitializeNativeTargetAsmParser(); 1219 1220 // Install standard binary operators. 1221 // 1 is lowest precedence. 1222 BinopPrecedence['='] = 2; 1223 BinopPrecedence['<'] = 10; 1224 BinopPrecedence['+'] = 20; 1225 BinopPrecedence['-'] = 20; 1226 BinopPrecedence['*'] = 40; // highest. 1227 1228 // Prime the first token. 1229 fprintf(stderr, "ready> "); 1230 getNextToken(); 1231 1232 TheJIT = ExitOnErr(KaleidoscopeJIT::Create()); 1233 TheContext = &TheJIT->getContext(); 1234 1235 InitializeModule(); 1236 1237 // Run the main "interpreter loop" now. 1238 MainLoop(); 1239 1240 return 0; 1241 } 1242