1 #include "../include/KaleidoscopeJIT.h" 2 #include "llvm/ADT/APFloat.h" 3 #include "llvm/ADT/STLExtras.h" 4 #include "llvm/IR/BasicBlock.h" 5 #include "llvm/IR/Constants.h" 6 #include "llvm/IR/DerivedTypes.h" 7 #include "llvm/IR/Function.h" 8 #include "llvm/IR/IRBuilder.h" 9 #include "llvm/IR/Instructions.h" 10 #include "llvm/IR/LLVMContext.h" 11 #include "llvm/IR/Module.h" 12 #include "llvm/IR/PassManager.h" 13 #include "llvm/IR/Type.h" 14 #include "llvm/IR/Verifier.h" 15 #include "llvm/Passes/PassBuilder.h" 16 #include "llvm/Passes/StandardInstrumentations.h" 17 #include "llvm/Support/TargetSelect.h" 18 #include "llvm/Target/TargetMachine.h" 19 #include "llvm/Transforms/InstCombine/InstCombine.h" 20 #include "llvm/Transforms/Scalar.h" 21 #include "llvm/Transforms/Scalar/GVN.h" 22 #include "llvm/Transforms/Scalar/Reassociate.h" 23 #include "llvm/Transforms/Scalar/SimplifyCFG.h" 24 #include "llvm/Transforms/Utils/Mem2Reg.h" 25 #include <algorithm> 26 #include <cassert> 27 #include <cctype> 28 #include <cstdint> 29 #include <cstdio> 30 #include <cstdlib> 31 #include <map> 32 #include <memory> 33 #include <string> 34 #include <utility> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::orc; 39 40 //===----------------------------------------------------------------------===// 41 // Lexer 42 //===----------------------------------------------------------------------===// 43 44 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 45 // of these for known things. 46 enum Token { 47 tok_eof = -1, 48 49 // commands 50 tok_def = -2, 51 tok_extern = -3, 52 53 // primary 54 tok_identifier = -4, 55 tok_number = -5, 56 57 // control 58 tok_if = -6, 59 tok_then = -7, 60 tok_else = -8, 61 tok_for = -9, 62 tok_in = -10, 63 64 // operators 65 tok_binary = -11, 66 tok_unary = -12, 67 68 // var definition 69 tok_var = -13 70 }; 71 72 static std::string IdentifierStr; // Filled in if tok_identifier 73 static double NumVal; // Filled in if tok_number 74 75 /// gettok - Return the next token from standard input. 76 static int gettok() { 77 static int LastChar = ' '; 78 79 // Skip any whitespace. 80 while (isspace(LastChar)) 81 LastChar = getchar(); 82 83 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 84 IdentifierStr = LastChar; 85 while (isalnum((LastChar = getchar()))) 86 IdentifierStr += LastChar; 87 88 if (IdentifierStr == "def") 89 return tok_def; 90 if (IdentifierStr == "extern") 91 return tok_extern; 92 if (IdentifierStr == "if") 93 return tok_if; 94 if (IdentifierStr == "then") 95 return tok_then; 96 if (IdentifierStr == "else") 97 return tok_else; 98 if (IdentifierStr == "for") 99 return tok_for; 100 if (IdentifierStr == "in") 101 return tok_in; 102 if (IdentifierStr == "binary") 103 return tok_binary; 104 if (IdentifierStr == "unary") 105 return tok_unary; 106 if (IdentifierStr == "var") 107 return tok_var; 108 return tok_identifier; 109 } 110 111 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 112 std::string NumStr; 113 do { 114 NumStr += LastChar; 115 LastChar = getchar(); 116 } while (isdigit(LastChar) || LastChar == '.'); 117 118 NumVal = strtod(NumStr.c_str(), nullptr); 119 return tok_number; 120 } 121 122 if (LastChar == '#') { 123 // Comment until end of line. 124 do 125 LastChar = getchar(); 126 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 127 128 if (LastChar != EOF) 129 return gettok(); 130 } 131 132 // Check for end of file. Don't eat the EOF. 133 if (LastChar == EOF) 134 return tok_eof; 135 136 // Otherwise, just return the character as its ascii value. 137 int ThisChar = LastChar; 138 LastChar = getchar(); 139 return ThisChar; 140 } 141 142 //===----------------------------------------------------------------------===// 143 // Abstract Syntax Tree (aka Parse Tree) 144 //===----------------------------------------------------------------------===// 145 146 namespace { 147 148 /// ExprAST - Base class for all expression nodes. 149 class ExprAST { 150 public: 151 virtual ~ExprAST() = default; 152 153 virtual Value *codegen() = 0; 154 }; 155 156 /// NumberExprAST - Expression class for numeric literals like "1.0". 157 class NumberExprAST : public ExprAST { 158 double Val; 159 160 public: 161 NumberExprAST(double Val) : Val(Val) {} 162 163 Value *codegen() override; 164 }; 165 166 /// VariableExprAST - Expression class for referencing a variable, like "a". 167 class VariableExprAST : public ExprAST { 168 std::string Name; 169 170 public: 171 VariableExprAST(const std::string &Name) : Name(Name) {} 172 173 Value *codegen() override; 174 const std::string &getName() const { return Name; } 175 }; 176 177 /// UnaryExprAST - Expression class for a unary operator. 178 class UnaryExprAST : public ExprAST { 179 char Opcode; 180 std::unique_ptr<ExprAST> Operand; 181 182 public: 183 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 184 : Opcode(Opcode), Operand(std::move(Operand)) {} 185 186 Value *codegen() override; 187 }; 188 189 /// BinaryExprAST - Expression class for a binary operator. 190 class BinaryExprAST : public ExprAST { 191 char Op; 192 std::unique_ptr<ExprAST> LHS, RHS; 193 194 public: 195 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 196 std::unique_ptr<ExprAST> RHS) 197 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 198 199 Value *codegen() override; 200 }; 201 202 /// CallExprAST - Expression class for function calls. 203 class CallExprAST : public ExprAST { 204 std::string Callee; 205 std::vector<std::unique_ptr<ExprAST>> Args; 206 207 public: 208 CallExprAST(const std::string &Callee, 209 std::vector<std::unique_ptr<ExprAST>> Args) 210 : Callee(Callee), Args(std::move(Args)) {} 211 212 Value *codegen() override; 213 }; 214 215 /// IfExprAST - Expression class for if/then/else. 216 class IfExprAST : public ExprAST { 217 std::unique_ptr<ExprAST> Cond, Then, Else; 218 219 public: 220 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, 221 std::unique_ptr<ExprAST> Else) 222 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} 223 224 Value *codegen() override; 225 }; 226 227 /// ForExprAST - Expression class for for/in. 228 class ForExprAST : public ExprAST { 229 std::string VarName; 230 std::unique_ptr<ExprAST> Start, End, Step, Body; 231 232 public: 233 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, 234 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, 235 std::unique_ptr<ExprAST> Body) 236 : VarName(VarName), Start(std::move(Start)), End(std::move(End)), 237 Step(std::move(Step)), Body(std::move(Body)) {} 238 239 Value *codegen() override; 240 }; 241 242 /// VarExprAST - Expression class for var/in 243 class VarExprAST : public ExprAST { 244 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 245 std::unique_ptr<ExprAST> Body; 246 247 public: 248 VarExprAST( 249 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, 250 std::unique_ptr<ExprAST> Body) 251 : VarNames(std::move(VarNames)), Body(std::move(Body)) {} 252 253 Value *codegen() override; 254 }; 255 256 /// PrototypeAST - This class represents the "prototype" for a function, 257 /// which captures its name, and its argument names (thus implicitly the number 258 /// of arguments the function takes), as well as if it is an operator. 259 class PrototypeAST { 260 std::string Name; 261 std::vector<std::string> Args; 262 bool IsOperator; 263 unsigned Precedence; // Precedence if a binary op. 264 265 public: 266 PrototypeAST(const std::string &Name, std::vector<std::string> Args, 267 bool IsOperator = false, unsigned Prec = 0) 268 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), 269 Precedence(Prec) {} 270 271 Function *codegen(); 272 const std::string &getName() const { return Name; } 273 274 bool isUnaryOp() const { return IsOperator && Args.size() == 1; } 275 bool isBinaryOp() const { return IsOperator && Args.size() == 2; } 276 277 char getOperatorName() const { 278 assert(isUnaryOp() || isBinaryOp()); 279 return Name[Name.size() - 1]; 280 } 281 282 unsigned getBinaryPrecedence() const { return Precedence; } 283 }; 284 285 /// FunctionAST - This class represents a function definition itself. 286 class FunctionAST { 287 std::unique_ptr<PrototypeAST> Proto; 288 std::unique_ptr<ExprAST> Body; 289 290 public: 291 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 292 std::unique_ptr<ExprAST> Body) 293 : Proto(std::move(Proto)), Body(std::move(Body)) {} 294 295 Function *codegen(); 296 }; 297 298 } // end anonymous namespace 299 300 //===----------------------------------------------------------------------===// 301 // Parser 302 //===----------------------------------------------------------------------===// 303 304 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 305 /// token the parser is looking at. getNextToken reads another token from the 306 /// lexer and updates CurTok with its results. 307 static int CurTok; 308 static int getNextToken() { return CurTok = gettok(); } 309 310 /// BinopPrecedence - This holds the precedence for each binary operator that is 311 /// defined. 312 static std::map<char, int> BinopPrecedence; 313 314 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 315 static int GetTokPrecedence() { 316 if (!isascii(CurTok)) 317 return -1; 318 319 // Make sure it's a declared binop. 320 int TokPrec = BinopPrecedence[CurTok]; 321 if (TokPrec <= 0) 322 return -1; 323 return TokPrec; 324 } 325 326 /// LogError* - These are little helper functions for error handling. 327 std::unique_ptr<ExprAST> LogError(const char *Str) { 328 fprintf(stderr, "Error: %s\n", Str); 329 return nullptr; 330 } 331 332 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 333 LogError(Str); 334 return nullptr; 335 } 336 337 static std::unique_ptr<ExprAST> ParseExpression(); 338 339 /// numberexpr ::= number 340 static std::unique_ptr<ExprAST> ParseNumberExpr() { 341 auto Result = std::make_unique<NumberExprAST>(NumVal); 342 getNextToken(); // consume the number 343 return std::move(Result); 344 } 345 346 /// parenexpr ::= '(' expression ')' 347 static std::unique_ptr<ExprAST> ParseParenExpr() { 348 getNextToken(); // eat (. 349 auto V = ParseExpression(); 350 if (!V) 351 return nullptr; 352 353 if (CurTok != ')') 354 return LogError("expected ')'"); 355 getNextToken(); // eat ). 356 return V; 357 } 358 359 /// identifierexpr 360 /// ::= identifier 361 /// ::= identifier '(' expression* ')' 362 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 363 std::string IdName = IdentifierStr; 364 365 getNextToken(); // eat identifier. 366 367 if (CurTok != '(') // Simple variable ref. 368 return std::make_unique<VariableExprAST>(IdName); 369 370 // Call. 371 getNextToken(); // eat ( 372 std::vector<std::unique_ptr<ExprAST>> Args; 373 if (CurTok != ')') { 374 while (true) { 375 if (auto Arg = ParseExpression()) 376 Args.push_back(std::move(Arg)); 377 else 378 return nullptr; 379 380 if (CurTok == ')') 381 break; 382 383 if (CurTok != ',') 384 return LogError("Expected ')' or ',' in argument list"); 385 getNextToken(); 386 } 387 } 388 389 // Eat the ')'. 390 getNextToken(); 391 392 return std::make_unique<CallExprAST>(IdName, std::move(Args)); 393 } 394 395 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 396 static std::unique_ptr<ExprAST> ParseIfExpr() { 397 getNextToken(); // eat the if. 398 399 // condition. 400 auto Cond = ParseExpression(); 401 if (!Cond) 402 return nullptr; 403 404 if (CurTok != tok_then) 405 return LogError("expected then"); 406 getNextToken(); // eat the then 407 408 auto Then = ParseExpression(); 409 if (!Then) 410 return nullptr; 411 412 if (CurTok != tok_else) 413 return LogError("expected else"); 414 415 getNextToken(); 416 417 auto Else = ParseExpression(); 418 if (!Else) 419 return nullptr; 420 421 return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then), 422 std::move(Else)); 423 } 424 425 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 426 static std::unique_ptr<ExprAST> ParseForExpr() { 427 getNextToken(); // eat the for. 428 429 if (CurTok != tok_identifier) 430 return LogError("expected identifier after for"); 431 432 std::string IdName = IdentifierStr; 433 getNextToken(); // eat identifier. 434 435 if (CurTok != '=') 436 return LogError("expected '=' after for"); 437 getNextToken(); // eat '='. 438 439 auto Start = ParseExpression(); 440 if (!Start) 441 return nullptr; 442 if (CurTok != ',') 443 return LogError("expected ',' after for start value"); 444 getNextToken(); 445 446 auto End = ParseExpression(); 447 if (!End) 448 return nullptr; 449 450 // The step value is optional. 451 std::unique_ptr<ExprAST> Step; 452 if (CurTok == ',') { 453 getNextToken(); 454 Step = ParseExpression(); 455 if (!Step) 456 return nullptr; 457 } 458 459 if (CurTok != tok_in) 460 return LogError("expected 'in' after for"); 461 getNextToken(); // eat 'in'. 462 463 auto Body = ParseExpression(); 464 if (!Body) 465 return nullptr; 466 467 return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), 468 std::move(Step), std::move(Body)); 469 } 470 471 /// varexpr ::= 'var' identifier ('=' expression)? 472 // (',' identifier ('=' expression)?)* 'in' expression 473 static std::unique_ptr<ExprAST> ParseVarExpr() { 474 getNextToken(); // eat the var. 475 476 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 477 478 // At least one variable name is required. 479 if (CurTok != tok_identifier) 480 return LogError("expected identifier after var"); 481 482 while (true) { 483 std::string Name = IdentifierStr; 484 getNextToken(); // eat identifier. 485 486 // Read the optional initializer. 487 std::unique_ptr<ExprAST> Init = nullptr; 488 if (CurTok == '=') { 489 getNextToken(); // eat the '='. 490 491 Init = ParseExpression(); 492 if (!Init) 493 return nullptr; 494 } 495 496 VarNames.push_back(std::make_pair(Name, std::move(Init))); 497 498 // End of var list, exit loop. 499 if (CurTok != ',') 500 break; 501 getNextToken(); // eat the ','. 502 503 if (CurTok != tok_identifier) 504 return LogError("expected identifier list after var"); 505 } 506 507 // At this point, we have to have 'in'. 508 if (CurTok != tok_in) 509 return LogError("expected 'in' keyword after 'var'"); 510 getNextToken(); // eat 'in'. 511 512 auto Body = ParseExpression(); 513 if (!Body) 514 return nullptr; 515 516 return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); 517 } 518 519 /// primary 520 /// ::= identifierexpr 521 /// ::= numberexpr 522 /// ::= parenexpr 523 /// ::= ifexpr 524 /// ::= forexpr 525 /// ::= varexpr 526 static std::unique_ptr<ExprAST> ParsePrimary() { 527 switch (CurTok) { 528 default: 529 return LogError("unknown token when expecting an expression"); 530 case tok_identifier: 531 return ParseIdentifierExpr(); 532 case tok_number: 533 return ParseNumberExpr(); 534 case '(': 535 return ParseParenExpr(); 536 case tok_if: 537 return ParseIfExpr(); 538 case tok_for: 539 return ParseForExpr(); 540 case tok_var: 541 return ParseVarExpr(); 542 } 543 } 544 545 /// unary 546 /// ::= primary 547 /// ::= '!' unary 548 static std::unique_ptr<ExprAST> ParseUnary() { 549 // If the current token is not an operator, it must be a primary expr. 550 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 551 return ParsePrimary(); 552 553 // If this is a unary operator, read it. 554 int Opc = CurTok; 555 getNextToken(); 556 if (auto Operand = ParseUnary()) 557 return std::make_unique<UnaryExprAST>(Opc, std::move(Operand)); 558 return nullptr; 559 } 560 561 /// binoprhs 562 /// ::= ('+' unary)* 563 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 564 std::unique_ptr<ExprAST> LHS) { 565 // If this is a binop, find its precedence. 566 while (true) { 567 int TokPrec = GetTokPrecedence(); 568 569 // If this is a binop that binds at least as tightly as the current binop, 570 // consume it, otherwise we are done. 571 if (TokPrec < ExprPrec) 572 return LHS; 573 574 // Okay, we know this is a binop. 575 int BinOp = CurTok; 576 getNextToken(); // eat binop 577 578 // Parse the unary expression after the binary operator. 579 auto RHS = ParseUnary(); 580 if (!RHS) 581 return nullptr; 582 583 // If BinOp binds less tightly with RHS than the operator after RHS, let 584 // the pending operator take RHS as its LHS. 585 int NextPrec = GetTokPrecedence(); 586 if (TokPrec < NextPrec) { 587 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 588 if (!RHS) 589 return nullptr; 590 } 591 592 // Merge LHS/RHS. 593 LHS = 594 std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 595 } 596 } 597 598 /// expression 599 /// ::= unary binoprhs 600 /// 601 static std::unique_ptr<ExprAST> ParseExpression() { 602 auto LHS = ParseUnary(); 603 if (!LHS) 604 return nullptr; 605 606 return ParseBinOpRHS(0, std::move(LHS)); 607 } 608 609 /// prototype 610 /// ::= id '(' id* ')' 611 /// ::= binary LETTER number? (id, id) 612 /// ::= unary LETTER (id) 613 static std::unique_ptr<PrototypeAST> ParsePrototype() { 614 std::string FnName; 615 616 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 617 unsigned BinaryPrecedence = 30; 618 619 switch (CurTok) { 620 default: 621 return LogErrorP("Expected function name in prototype"); 622 case tok_identifier: 623 FnName = IdentifierStr; 624 Kind = 0; 625 getNextToken(); 626 break; 627 case tok_unary: 628 getNextToken(); 629 if (!isascii(CurTok)) 630 return LogErrorP("Expected unary operator"); 631 FnName = "unary"; 632 FnName += (char)CurTok; 633 Kind = 1; 634 getNextToken(); 635 break; 636 case tok_binary: 637 getNextToken(); 638 if (!isascii(CurTok)) 639 return LogErrorP("Expected binary operator"); 640 FnName = "binary"; 641 FnName += (char)CurTok; 642 Kind = 2; 643 getNextToken(); 644 645 // Read the precedence if present. 646 if (CurTok == tok_number) { 647 if (NumVal < 1 || NumVal > 100) 648 return LogErrorP("Invalid precedence: must be 1..100"); 649 BinaryPrecedence = (unsigned)NumVal; 650 getNextToken(); 651 } 652 break; 653 } 654 655 if (CurTok != '(') 656 return LogErrorP("Expected '(' in prototype"); 657 658 std::vector<std::string> ArgNames; 659 while (getNextToken() == tok_identifier) 660 ArgNames.push_back(IdentifierStr); 661 if (CurTok != ')') 662 return LogErrorP("Expected ')' in prototype"); 663 664 // success. 665 getNextToken(); // eat ')'. 666 667 // Verify right number of names for operator. 668 if (Kind && ArgNames.size() != Kind) 669 return LogErrorP("Invalid number of operands for operator"); 670 671 return std::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, 672 BinaryPrecedence); 673 } 674 675 /// definition ::= 'def' prototype expression 676 static std::unique_ptr<FunctionAST> ParseDefinition() { 677 getNextToken(); // eat def. 678 auto Proto = ParsePrototype(); 679 if (!Proto) 680 return nullptr; 681 682 if (auto E = ParseExpression()) 683 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 684 return nullptr; 685 } 686 687 /// toplevelexpr ::= expression 688 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 689 if (auto E = ParseExpression()) { 690 // Make an anonymous proto. 691 auto Proto = std::make_unique<PrototypeAST>("__anon_expr", 692 std::vector<std::string>()); 693 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 694 } 695 return nullptr; 696 } 697 698 /// external ::= 'extern' prototype 699 static std::unique_ptr<PrototypeAST> ParseExtern() { 700 getNextToken(); // eat extern. 701 return ParsePrototype(); 702 } 703 704 //===----------------------------------------------------------------------===// 705 // Code Generation 706 //===----------------------------------------------------------------------===// 707 708 static std::unique_ptr<LLVMContext> TheContext; 709 static std::unique_ptr<Module> TheModule; 710 static std::unique_ptr<IRBuilder<>> Builder; 711 static std::map<std::string, AllocaInst *> NamedValues; 712 static std::unique_ptr<KaleidoscopeJIT> TheJIT; 713 static std::unique_ptr<FunctionPassManager> TheFPM; 714 static std::unique_ptr<LoopAnalysisManager> TheLAM; 715 static std::unique_ptr<FunctionAnalysisManager> TheFAM; 716 static std::unique_ptr<CGSCCAnalysisManager> TheCGAM; 717 static std::unique_ptr<ModuleAnalysisManager> TheMAM; 718 static std::unique_ptr<PassInstrumentationCallbacks> ThePIC; 719 static std::unique_ptr<StandardInstrumentations> TheSI; 720 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; 721 static ExitOnError ExitOnErr; 722 723 Value *LogErrorV(const char *Str) { 724 LogError(Str); 725 return nullptr; 726 } 727 728 Function *getFunction(std::string Name) { 729 // First, see if the function has already been added to the current module. 730 if (auto *F = TheModule->getFunction(Name)) 731 return F; 732 733 // If not, check whether we can codegen the declaration from some existing 734 // prototype. 735 auto FI = FunctionProtos.find(Name); 736 if (FI != FunctionProtos.end()) 737 return FI->second->codegen(); 738 739 // If no existing prototype exists, return null. 740 return nullptr; 741 } 742 743 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 744 /// the function. This is used for mutable variables etc. 745 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 746 StringRef VarName) { 747 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 748 TheFunction->getEntryBlock().begin()); 749 return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName); 750 } 751 752 Value *NumberExprAST::codegen() { 753 return ConstantFP::get(*TheContext, APFloat(Val)); 754 } 755 756 Value *VariableExprAST::codegen() { 757 // Look this variable up in the function. 758 AllocaInst *A = NamedValues[Name]; 759 if (!A) 760 return LogErrorV("Unknown variable name"); 761 762 // Load the value. 763 return Builder->CreateLoad(A->getAllocatedType(), A, Name.c_str()); 764 } 765 766 Value *UnaryExprAST::codegen() { 767 Value *OperandV = Operand->codegen(); 768 if (!OperandV) 769 return nullptr; 770 771 Function *F = getFunction(std::string("unary") + Opcode); 772 if (!F) 773 return LogErrorV("Unknown unary operator"); 774 775 return Builder->CreateCall(F, OperandV, "unop"); 776 } 777 778 Value *BinaryExprAST::codegen() { 779 // Special case '=' because we don't want to emit the LHS as an expression. 780 if (Op == '=') { 781 // Assignment requires the LHS to be an identifier. 782 // This assume we're building without RTTI because LLVM builds that way by 783 // default. If you build LLVM with RTTI this can be changed to a 784 // dynamic_cast for automatic error checking. 785 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); 786 if (!LHSE) 787 return LogErrorV("destination of '=' must be a variable"); 788 // Codegen the RHS. 789 Value *Val = RHS->codegen(); 790 if (!Val) 791 return nullptr; 792 793 // Look up the name. 794 Value *Variable = NamedValues[LHSE->getName()]; 795 if (!Variable) 796 return LogErrorV("Unknown variable name"); 797 798 Builder->CreateStore(Val, Variable); 799 return Val; 800 } 801 802 Value *L = LHS->codegen(); 803 Value *R = RHS->codegen(); 804 if (!L || !R) 805 return nullptr; 806 807 switch (Op) { 808 case '+': 809 return Builder->CreateFAdd(L, R, "addtmp"); 810 case '-': 811 return Builder->CreateFSub(L, R, "subtmp"); 812 case '*': 813 return Builder->CreateFMul(L, R, "multmp"); 814 case '<': 815 L = Builder->CreateFCmpULT(L, R, "cmptmp"); 816 // Convert bool 0/1 to double 0.0 or 1.0 817 return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp"); 818 default: 819 break; 820 } 821 822 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 823 // a call to it. 824 Function *F = getFunction(std::string("binary") + Op); 825 assert(F && "binary operator not found!"); 826 827 Value *Ops[] = {L, R}; 828 return Builder->CreateCall(F, Ops, "binop"); 829 } 830 831 Value *CallExprAST::codegen() { 832 // Look up the name in the global module table. 833 Function *CalleeF = getFunction(Callee); 834 if (!CalleeF) 835 return LogErrorV("Unknown function referenced"); 836 837 // If argument mismatch error. 838 if (CalleeF->arg_size() != Args.size()) 839 return LogErrorV("Incorrect # arguments passed"); 840 841 std::vector<Value *> ArgsV; 842 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 843 ArgsV.push_back(Args[i]->codegen()); 844 if (!ArgsV.back()) 845 return nullptr; 846 } 847 848 return Builder->CreateCall(CalleeF, ArgsV, "calltmp"); 849 } 850 851 Value *IfExprAST::codegen() { 852 Value *CondV = Cond->codegen(); 853 if (!CondV) 854 return nullptr; 855 856 // Convert condition to a bool by comparing non-equal to 0.0. 857 CondV = Builder->CreateFCmpONE( 858 CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond"); 859 860 Function *TheFunction = Builder->GetInsertBlock()->getParent(); 861 862 // Create blocks for the then and else cases. Insert the 'then' block at the 863 // end of the function. 864 BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction); 865 BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else"); 866 BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont"); 867 868 Builder->CreateCondBr(CondV, ThenBB, ElseBB); 869 870 // Emit then value. 871 Builder->SetInsertPoint(ThenBB); 872 873 Value *ThenV = Then->codegen(); 874 if (!ThenV) 875 return nullptr; 876 877 Builder->CreateBr(MergeBB); 878 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 879 ThenBB = Builder->GetInsertBlock(); 880 881 // Emit else block. 882 TheFunction->insert(TheFunction->end(), ElseBB); 883 Builder->SetInsertPoint(ElseBB); 884 885 Value *ElseV = Else->codegen(); 886 if (!ElseV) 887 return nullptr; 888 889 Builder->CreateBr(MergeBB); 890 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 891 ElseBB = Builder->GetInsertBlock(); 892 893 // Emit merge block. 894 TheFunction->insert(TheFunction->end(), MergeBB); 895 Builder->SetInsertPoint(MergeBB); 896 PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp"); 897 898 PN->addIncoming(ThenV, ThenBB); 899 PN->addIncoming(ElseV, ElseBB); 900 return PN; 901 } 902 903 // Output for-loop as: 904 // var = alloca double 905 // ... 906 // start = startexpr 907 // store start -> var 908 // goto loop 909 // loop: 910 // ... 911 // bodyexpr 912 // ... 913 // loopend: 914 // step = stepexpr 915 // endcond = endexpr 916 // 917 // curvar = load var 918 // nextvar = curvar + step 919 // store nextvar -> var 920 // br endcond, loop, endloop 921 // outloop: 922 Value *ForExprAST::codegen() { 923 Function *TheFunction = Builder->GetInsertBlock()->getParent(); 924 925 // Create an alloca for the variable in the entry block. 926 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 927 928 // Emit the start code first, without 'variable' in scope. 929 Value *StartVal = Start->codegen(); 930 if (!StartVal) 931 return nullptr; 932 933 // Store the value into the alloca. 934 Builder->CreateStore(StartVal, Alloca); 935 936 // Make the new basic block for the loop header, inserting after current 937 // block. 938 BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction); 939 940 // Insert an explicit fall through from the current block to the LoopBB. 941 Builder->CreateBr(LoopBB); 942 943 // Start insertion in LoopBB. 944 Builder->SetInsertPoint(LoopBB); 945 946 // Within the loop, the variable is defined equal to the PHI node. If it 947 // shadows an existing variable, we have to restore it, so save it now. 948 AllocaInst *OldVal = NamedValues[VarName]; 949 NamedValues[VarName] = Alloca; 950 951 // Emit the body of the loop. This, like any other expr, can change the 952 // current BB. Note that we ignore the value computed by the body, but don't 953 // allow an error. 954 if (!Body->codegen()) 955 return nullptr; 956 957 // Emit the step value. 958 Value *StepVal = nullptr; 959 if (Step) { 960 StepVal = Step->codegen(); 961 if (!StepVal) 962 return nullptr; 963 } else { 964 // If not specified, use 1.0. 965 StepVal = ConstantFP::get(*TheContext, APFloat(1.0)); 966 } 967 968 // Compute the end condition. 969 Value *EndCond = End->codegen(); 970 if (!EndCond) 971 return nullptr; 972 973 // Reload, increment, and restore the alloca. This handles the case where 974 // the body of the loop mutates the variable. 975 Value *CurVar = 976 Builder->CreateLoad(Alloca->getAllocatedType(), Alloca, VarName.c_str()); 977 Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar"); 978 Builder->CreateStore(NextVar, Alloca); 979 980 // Convert condition to a bool by comparing non-equal to 0.0. 981 EndCond = Builder->CreateFCmpONE( 982 EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond"); 983 984 // Create the "after loop" block and insert it. 985 BasicBlock *AfterBB = 986 BasicBlock::Create(*TheContext, "afterloop", TheFunction); 987 988 // Insert the conditional branch into the end of LoopEndBB. 989 Builder->CreateCondBr(EndCond, LoopBB, AfterBB); 990 991 // Any new code will be inserted in AfterBB. 992 Builder->SetInsertPoint(AfterBB); 993 994 // Restore the unshadowed variable. 995 if (OldVal) 996 NamedValues[VarName] = OldVal; 997 else 998 NamedValues.erase(VarName); 999 1000 // for expr always returns 0.0. 1001 return Constant::getNullValue(Type::getDoubleTy(*TheContext)); 1002 } 1003 1004 Value *VarExprAST::codegen() { 1005 std::vector<AllocaInst *> OldBindings; 1006 1007 Function *TheFunction = Builder->GetInsertBlock()->getParent(); 1008 1009 // Register all variables and emit their initializer. 1010 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 1011 const std::string &VarName = VarNames[i].first; 1012 ExprAST *Init = VarNames[i].second.get(); 1013 1014 // Emit the initializer before adding the variable to scope, this prevents 1015 // the initializer from referencing the variable itself, and permits stuff 1016 // like this: 1017 // var a = 1 in 1018 // var a = a in ... # refers to outer 'a'. 1019 Value *InitVal; 1020 if (Init) { 1021 InitVal = Init->codegen(); 1022 if (!InitVal) 1023 return nullptr; 1024 } else { // If not specified, use 0.0. 1025 InitVal = ConstantFP::get(*TheContext, APFloat(0.0)); 1026 } 1027 1028 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 1029 Builder->CreateStore(InitVal, Alloca); 1030 1031 // Remember the old variable binding so that we can restore the binding when 1032 // we unrecurse. 1033 OldBindings.push_back(NamedValues[VarName]); 1034 1035 // Remember this binding. 1036 NamedValues[VarName] = Alloca; 1037 } 1038 1039 // Codegen the body, now that all vars are in scope. 1040 Value *BodyVal = Body->codegen(); 1041 if (!BodyVal) 1042 return nullptr; 1043 1044 // Pop all our variables from scope. 1045 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 1046 NamedValues[VarNames[i].first] = OldBindings[i]; 1047 1048 // Return the body computation. 1049 return BodyVal; 1050 } 1051 1052 Function *PrototypeAST::codegen() { 1053 // Make the function type: double(double,double) etc. 1054 std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext)); 1055 FunctionType *FT = 1056 FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false); 1057 1058 Function *F = 1059 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); 1060 1061 // Set names for all arguments. 1062 unsigned Idx = 0; 1063 for (auto &Arg : F->args()) 1064 Arg.setName(Args[Idx++]); 1065 1066 return F; 1067 } 1068 1069 Function *FunctionAST::codegen() { 1070 // Transfer ownership of the prototype to the FunctionProtos map, but keep a 1071 // reference to it for use below. 1072 auto &P = *Proto; 1073 FunctionProtos[Proto->getName()] = std::move(Proto); 1074 Function *TheFunction = getFunction(P.getName()); 1075 if (!TheFunction) 1076 return nullptr; 1077 1078 // If this is an operator, install it. 1079 if (P.isBinaryOp()) 1080 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); 1081 1082 // Create a new basic block to start insertion into. 1083 BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction); 1084 Builder->SetInsertPoint(BB); 1085 1086 // Record the function arguments in the NamedValues map. 1087 NamedValues.clear(); 1088 for (auto &Arg : TheFunction->args()) { 1089 // Create an alloca for this variable. 1090 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); 1091 1092 // Store the initial value into the alloca. 1093 Builder->CreateStore(&Arg, Alloca); 1094 1095 // Add arguments to variable symbol table. 1096 NamedValues[std::string(Arg.getName())] = Alloca; 1097 } 1098 1099 if (Value *RetVal = Body->codegen()) { 1100 // Finish off the function. 1101 Builder->CreateRet(RetVal); 1102 1103 // Validate the generated code, checking for consistency. 1104 verifyFunction(*TheFunction); 1105 1106 // Run the optimizer on the function. 1107 TheFPM->run(*TheFunction, *TheFAM); 1108 1109 return TheFunction; 1110 } 1111 1112 // Error reading body, remove function. 1113 TheFunction->eraseFromParent(); 1114 1115 if (P.isBinaryOp()) 1116 BinopPrecedence.erase(P.getOperatorName()); 1117 return nullptr; 1118 } 1119 1120 //===----------------------------------------------------------------------===// 1121 // Top-Level parsing and JIT Driver 1122 //===----------------------------------------------------------------------===// 1123 1124 static void InitializeModuleAndManagers() { 1125 // Open a new context and module. 1126 TheContext = std::make_unique<LLVMContext>(); 1127 TheModule = std::make_unique<Module>("KaleidoscopeJIT", *TheContext); 1128 TheModule->setDataLayout(TheJIT->getDataLayout()); 1129 1130 // Create a new builder for the module. 1131 Builder = std::make_unique<IRBuilder<>>(*TheContext); 1132 1133 // Create new pass and analysis managers. 1134 TheFPM = std::make_unique<FunctionPassManager>(); 1135 TheLAM = std::make_unique<LoopAnalysisManager>(); 1136 TheFAM = std::make_unique<FunctionAnalysisManager>(); 1137 TheCGAM = std::make_unique<CGSCCAnalysisManager>(); 1138 TheMAM = std::make_unique<ModuleAnalysisManager>(); 1139 ThePIC = std::make_unique<PassInstrumentationCallbacks>(); 1140 TheSI = std::make_unique<StandardInstrumentations>(*TheContext, 1141 /*DebugLogging*/ true); 1142 TheSI->registerCallbacks(*ThePIC, TheMAM.get()); 1143 1144 // Add transform passes. 1145 // Promote allocas to registers. 1146 TheFPM->addPass(PromotePass()); 1147 // Do simple "peephole" optimizations and bit-twiddling optzns. 1148 TheFPM->addPass(InstCombinePass()); 1149 // Reassociate expressions. 1150 TheFPM->addPass(ReassociatePass()); 1151 // Eliminate Common SubExpressions. 1152 TheFPM->addPass(GVNPass()); 1153 // Simplify the control flow graph (deleting unreachable blocks, etc). 1154 TheFPM->addPass(SimplifyCFGPass()); 1155 1156 // Register analysis passes used in these transform passes. 1157 PassBuilder PB; 1158 PB.registerModuleAnalyses(*TheMAM); 1159 PB.registerFunctionAnalyses(*TheFAM); 1160 PB.crossRegisterProxies(*TheLAM, *TheFAM, *TheCGAM, *TheMAM); 1161 } 1162 1163 static void HandleDefinition() { 1164 if (auto FnAST = ParseDefinition()) { 1165 if (auto *FnIR = FnAST->codegen()) { 1166 fprintf(stderr, "Read function definition:"); 1167 FnIR->print(errs()); 1168 fprintf(stderr, "\n"); 1169 ExitOnErr(TheJIT->addModule( 1170 ThreadSafeModule(std::move(TheModule), std::move(TheContext)))); 1171 InitializeModuleAndManagers(); 1172 } 1173 } else { 1174 // Skip token for error recovery. 1175 getNextToken(); 1176 } 1177 } 1178 1179 static void HandleExtern() { 1180 if (auto ProtoAST = ParseExtern()) { 1181 if (auto *FnIR = ProtoAST->codegen()) { 1182 fprintf(stderr, "Read extern: "); 1183 FnIR->print(errs()); 1184 fprintf(stderr, "\n"); 1185 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); 1186 } 1187 } else { 1188 // Skip token for error recovery. 1189 getNextToken(); 1190 } 1191 } 1192 1193 static void HandleTopLevelExpression() { 1194 // Evaluate a top-level expression into an anonymous function. 1195 if (auto FnAST = ParseTopLevelExpr()) { 1196 if (FnAST->codegen()) { 1197 // Create a ResourceTracker to track JIT'd memory allocated to our 1198 // anonymous expression -- that way we can free it after executing. 1199 auto RT = TheJIT->getMainJITDylib().createResourceTracker(); 1200 1201 auto TSM = ThreadSafeModule(std::move(TheModule), std::move(TheContext)); 1202 ExitOnErr(TheJIT->addModule(std::move(TSM), RT)); 1203 InitializeModuleAndManagers(); 1204 1205 // Search the JIT for the __anon_expr symbol. 1206 auto ExprSymbol = ExitOnErr(TheJIT->lookup("__anon_expr")); 1207 1208 // Get the symbol's address and cast it to the right type (takes no 1209 // arguments, returns a double) so we can call it as a native function. 1210 double (*FP)() = ExprSymbol.toPtr<double (*)()>(); 1211 fprintf(stderr, "Evaluated to %f\n", FP()); 1212 1213 // Delete the anonymous expression module from the JIT. 1214 ExitOnErr(RT->remove()); 1215 } 1216 } else { 1217 // Skip token for error recovery. 1218 getNextToken(); 1219 } 1220 } 1221 1222 /// top ::= definition | external | expression | ';' 1223 static void MainLoop() { 1224 while (true) { 1225 fprintf(stderr, "ready> "); 1226 switch (CurTok) { 1227 case tok_eof: 1228 return; 1229 case ';': // ignore top-level semicolons. 1230 getNextToken(); 1231 break; 1232 case tok_def: 1233 HandleDefinition(); 1234 break; 1235 case tok_extern: 1236 HandleExtern(); 1237 break; 1238 default: 1239 HandleTopLevelExpression(); 1240 break; 1241 } 1242 } 1243 } 1244 1245 //===----------------------------------------------------------------------===// 1246 // "Library" functions that can be "extern'd" from user code. 1247 //===----------------------------------------------------------------------===// 1248 1249 #ifdef _WIN32 1250 #define DLLEXPORT __declspec(dllexport) 1251 #else 1252 #define DLLEXPORT 1253 #endif 1254 1255 /// putchard - putchar that takes a double and returns 0. 1256 extern "C" DLLEXPORT double putchard(double X) { 1257 fputc((char)X, stderr); 1258 return 0; 1259 } 1260 1261 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1262 extern "C" DLLEXPORT double printd(double X) { 1263 fprintf(stderr, "%f\n", X); 1264 return 0; 1265 } 1266 1267 //===----------------------------------------------------------------------===// 1268 // Main driver code. 1269 //===----------------------------------------------------------------------===// 1270 1271 int main() { 1272 InitializeNativeTarget(); 1273 InitializeNativeTargetAsmPrinter(); 1274 InitializeNativeTargetAsmParser(); 1275 1276 // Install standard binary operators. 1277 // 1 is lowest precedence. 1278 BinopPrecedence['='] = 2; 1279 BinopPrecedence['<'] = 10; 1280 BinopPrecedence['+'] = 20; 1281 BinopPrecedence['-'] = 20; 1282 BinopPrecedence['*'] = 40; // highest. 1283 1284 // Prime the first token. 1285 fprintf(stderr, "ready> "); 1286 getNextToken(); 1287 1288 TheJIT = ExitOnErr(KaleidoscopeJIT::Create()); 1289 1290 InitializeModuleAndManagers(); 1291 1292 // Run the main "interpreter loop" now. 1293 MainLoop(); 1294 1295 return 0; 1296 } 1297