1 #include "llvm/Analysis/Passes.h"
2 #include "llvm/ExecutionEngine/ExecutionEngine.h"
3 #include "llvm/ExecutionEngine/MCJIT.h"
4 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
5 #include "llvm/IR/DataLayout.h"
6 #include "llvm/IR/DerivedTypes.h"
7 #include "llvm/IR/IRBuilder.h"
8 #include "llvm/IR/LLVMContext.h"
9 #include "llvm/IR/Module.h"
10 #include "llvm/IR/Verifier.h"
11 #include "llvm/PassManager.h"
12 #include "llvm/Support/TargetSelect.h"
13 #include "llvm/Transforms/Scalar.h"
14 #include <cctype>
15 #include <cstdio>
16 #include <map>
17 #include <string>
18 #include <vector>
19 using namespace llvm;
20
21 //===----------------------------------------------------------------------===//
22 // Lexer
23 //===----------------------------------------------------------------------===//
24
25 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
26 // of these for known things.
27 enum Token {
28 tok_eof = -1,
29
30 // commands
31 tok_def = -2,
32 tok_extern = -3,
33
34 // primary
35 tok_identifier = -4,
36 tok_number = -5,
37
38 // control
39 tok_if = -6,
40 tok_then = -7,
41 tok_else = -8,
42 tok_for = -9,
43 tok_in = -10,
44
45 // operators
46 tok_binary = -11,
47 tok_unary = -12,
48
49 // var definition
50 tok_var = -13
51 };
52
53 static std::string IdentifierStr; // Filled in if tok_identifier
54 static double NumVal; // Filled in if tok_number
55
56 /// gettok - Return the next token from standard input.
gettok()57 static int gettok() {
58 static int LastChar = ' ';
59
60 // Skip any whitespace.
61 while (isspace(LastChar))
62 LastChar = getchar();
63
64 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
65 IdentifierStr = LastChar;
66 while (isalnum((LastChar = getchar())))
67 IdentifierStr += LastChar;
68
69 if (IdentifierStr == "def")
70 return tok_def;
71 if (IdentifierStr == "extern")
72 return tok_extern;
73 if (IdentifierStr == "if")
74 return tok_if;
75 if (IdentifierStr == "then")
76 return tok_then;
77 if (IdentifierStr == "else")
78 return tok_else;
79 if (IdentifierStr == "for")
80 return tok_for;
81 if (IdentifierStr == "in")
82 return tok_in;
83 if (IdentifierStr == "binary")
84 return tok_binary;
85 if (IdentifierStr == "unary")
86 return tok_unary;
87 if (IdentifierStr == "var")
88 return tok_var;
89 return tok_identifier;
90 }
91
92 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
93 std::string NumStr;
94 do {
95 NumStr += LastChar;
96 LastChar = getchar();
97 } while (isdigit(LastChar) || LastChar == '.');
98
99 NumVal = strtod(NumStr.c_str(), 0);
100 return tok_number;
101 }
102
103 if (LastChar == '#') {
104 // Comment until end of line.
105 do
106 LastChar = getchar();
107 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
108
109 if (LastChar != EOF)
110 return gettok();
111 }
112
113 // Check for end of file. Don't eat the EOF.
114 if (LastChar == EOF)
115 return tok_eof;
116
117 // Otherwise, just return the character as its ascii value.
118 int ThisChar = LastChar;
119 LastChar = getchar();
120 return ThisChar;
121 }
122
123 //===----------------------------------------------------------------------===//
124 // Abstract Syntax Tree (aka Parse Tree)
125 //===----------------------------------------------------------------------===//
126 namespace {
127 /// ExprAST - Base class for all expression nodes.
128 class ExprAST {
129 public:
~ExprAST()130 virtual ~ExprAST() {}
131 virtual Value *Codegen() = 0;
132 };
133
134 /// NumberExprAST - Expression class for numeric literals like "1.0".
135 class NumberExprAST : public ExprAST {
136 double Val;
137
138 public:
NumberExprAST(double val)139 NumberExprAST(double val) : Val(val) {}
140 virtual Value *Codegen();
141 };
142
143 /// VariableExprAST - Expression class for referencing a variable, like "a".
144 class VariableExprAST : public ExprAST {
145 std::string Name;
146
147 public:
VariableExprAST(const std::string & name)148 VariableExprAST(const std::string &name) : Name(name) {}
getName() const149 const std::string &getName() const { return Name; }
150 virtual Value *Codegen();
151 };
152
153 /// UnaryExprAST - Expression class for a unary operator.
154 class UnaryExprAST : public ExprAST {
155 char Opcode;
156 ExprAST *Operand;
157
158 public:
UnaryExprAST(char opcode,ExprAST * operand)159 UnaryExprAST(char opcode, ExprAST *operand)
160 : Opcode(opcode), Operand(operand) {}
161 virtual Value *Codegen();
162 };
163
164 /// BinaryExprAST - Expression class for a binary operator.
165 class BinaryExprAST : public ExprAST {
166 char Op;
167 ExprAST *LHS, *RHS;
168
169 public:
BinaryExprAST(char op,ExprAST * lhs,ExprAST * rhs)170 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
171 : Op(op), LHS(lhs), RHS(rhs) {}
172 virtual Value *Codegen();
173 };
174
175 /// CallExprAST - Expression class for function calls.
176 class CallExprAST : public ExprAST {
177 std::string Callee;
178 std::vector<ExprAST *> Args;
179
180 public:
CallExprAST(const std::string & callee,std::vector<ExprAST * > & args)181 CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
182 : Callee(callee), Args(args) {}
183 virtual Value *Codegen();
184 };
185
186 /// IfExprAST - Expression class for if/then/else.
187 class IfExprAST : public ExprAST {
188 ExprAST *Cond, *Then, *Else;
189
190 public:
IfExprAST(ExprAST * cond,ExprAST * then,ExprAST * _else)191 IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
192 : Cond(cond), Then(then), Else(_else) {}
193 virtual Value *Codegen();
194 };
195
196 /// ForExprAST - Expression class for for/in.
197 class ForExprAST : public ExprAST {
198 std::string VarName;
199 ExprAST *Start, *End, *Step, *Body;
200
201 public:
ForExprAST(const std::string & varname,ExprAST * start,ExprAST * end,ExprAST * step,ExprAST * body)202 ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
203 ExprAST *step, ExprAST *body)
204 : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
205 virtual Value *Codegen();
206 };
207
208 /// VarExprAST - Expression class for var/in
209 class VarExprAST : public ExprAST {
210 std::vector<std::pair<std::string, ExprAST *> > VarNames;
211 ExprAST *Body;
212
213 public:
VarExprAST(const std::vector<std::pair<std::string,ExprAST * >> & varnames,ExprAST * body)214 VarExprAST(const std::vector<std::pair<std::string, ExprAST *> > &varnames,
215 ExprAST *body)
216 : VarNames(varnames), Body(body) {}
217
218 virtual Value *Codegen();
219 };
220
221 /// PrototypeAST - This class represents the "prototype" for a function,
222 /// which captures its argument names as well as if it is an operator.
223 class PrototypeAST {
224 std::string Name;
225 std::vector<std::string> Args;
226 bool isOperator;
227 unsigned Precedence; // Precedence if a binary op.
228 public:
PrototypeAST(const std::string & name,const std::vector<std::string> & args,bool isoperator=false,unsigned prec=0)229 PrototypeAST(const std::string &name, const std::vector<std::string> &args,
230 bool isoperator = false, unsigned prec = 0)
231 : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
232
isUnaryOp() const233 bool isUnaryOp() const { return isOperator && Args.size() == 1; }
isBinaryOp() const234 bool isBinaryOp() const { return isOperator && Args.size() == 2; }
235
getOperatorName() const236 char getOperatorName() const {
237 assert(isUnaryOp() || isBinaryOp());
238 return Name[Name.size() - 1];
239 }
240
getBinaryPrecedence() const241 unsigned getBinaryPrecedence() const { return Precedence; }
242
243 Function *Codegen();
244
245 void CreateArgumentAllocas(Function *F);
246 };
247
248 /// FunctionAST - This class represents a function definition itself.
249 class FunctionAST {
250 PrototypeAST *Proto;
251 ExprAST *Body;
252
253 public:
FunctionAST(PrototypeAST * proto,ExprAST * body)254 FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {}
255
256 Function *Codegen();
257 };
258 } // end anonymous namespace
259
260 //===----------------------------------------------------------------------===//
261 // Parser
262 //===----------------------------------------------------------------------===//
263
264 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
265 /// token the parser is looking at. getNextToken reads another token from the
266 /// lexer and updates CurTok with its results.
267 static int CurTok;
getNextToken()268 static int getNextToken() { return CurTok = gettok(); }
269
270 /// BinopPrecedence - This holds the precedence for each binary operator that is
271 /// defined.
272 static std::map<char, int> BinopPrecedence;
273
274 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
GetTokPrecedence()275 static int GetTokPrecedence() {
276 if (!isascii(CurTok))
277 return -1;
278
279 // Make sure it's a declared binop.
280 int TokPrec = BinopPrecedence[CurTok];
281 if (TokPrec <= 0)
282 return -1;
283 return TokPrec;
284 }
285
286 /// Error* - These are little helper functions for error handling.
Error(const char * Str)287 ExprAST *Error(const char *Str) {
288 fprintf(stderr, "Error: %s\n", Str);
289 return 0;
290 }
ErrorP(const char * Str)291 PrototypeAST *ErrorP(const char *Str) {
292 Error(Str);
293 return 0;
294 }
ErrorF(const char * Str)295 FunctionAST *ErrorF(const char *Str) {
296 Error(Str);
297 return 0;
298 }
299
300 static ExprAST *ParseExpression();
301
302 /// identifierexpr
303 /// ::= identifier
304 /// ::= identifier '(' expression* ')'
ParseIdentifierExpr()305 static ExprAST *ParseIdentifierExpr() {
306 std::string IdName = IdentifierStr;
307
308 getNextToken(); // eat identifier.
309
310 if (CurTok != '(') // Simple variable ref.
311 return new VariableExprAST(IdName);
312
313 // Call.
314 getNextToken(); // eat (
315 std::vector<ExprAST *> Args;
316 if (CurTok != ')') {
317 while (1) {
318 ExprAST *Arg = ParseExpression();
319 if (!Arg)
320 return 0;
321 Args.push_back(Arg);
322
323 if (CurTok == ')')
324 break;
325
326 if (CurTok != ',')
327 return Error("Expected ')' or ',' in argument list");
328 getNextToken();
329 }
330 }
331
332 // Eat the ')'.
333 getNextToken();
334
335 return new CallExprAST(IdName, Args);
336 }
337
338 /// numberexpr ::= number
ParseNumberExpr()339 static ExprAST *ParseNumberExpr() {
340 ExprAST *Result = new NumberExprAST(NumVal);
341 getNextToken(); // consume the number
342 return Result;
343 }
344
345 /// parenexpr ::= '(' expression ')'
ParseParenExpr()346 static ExprAST *ParseParenExpr() {
347 getNextToken(); // eat (.
348 ExprAST *V = ParseExpression();
349 if (!V)
350 return 0;
351
352 if (CurTok != ')')
353 return Error("expected ')'");
354 getNextToken(); // eat ).
355 return V;
356 }
357
358 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
ParseIfExpr()359 static ExprAST *ParseIfExpr() {
360 getNextToken(); // eat the if.
361
362 // condition.
363 ExprAST *Cond = ParseExpression();
364 if (!Cond)
365 return 0;
366
367 if (CurTok != tok_then)
368 return Error("expected then");
369 getNextToken(); // eat the then
370
371 ExprAST *Then = ParseExpression();
372 if (Then == 0)
373 return 0;
374
375 if (CurTok != tok_else)
376 return Error("expected else");
377
378 getNextToken();
379
380 ExprAST *Else = ParseExpression();
381 if (!Else)
382 return 0;
383
384 return new IfExprAST(Cond, Then, Else);
385 }
386
387 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
ParseForExpr()388 static ExprAST *ParseForExpr() {
389 getNextToken(); // eat the for.
390
391 if (CurTok != tok_identifier)
392 return Error("expected identifier after for");
393
394 std::string IdName = IdentifierStr;
395 getNextToken(); // eat identifier.
396
397 if (CurTok != '=')
398 return Error("expected '=' after for");
399 getNextToken(); // eat '='.
400
401 ExprAST *Start = ParseExpression();
402 if (Start == 0)
403 return 0;
404 if (CurTok != ',')
405 return Error("expected ',' after for start value");
406 getNextToken();
407
408 ExprAST *End = ParseExpression();
409 if (End == 0)
410 return 0;
411
412 // The step value is optional.
413 ExprAST *Step = 0;
414 if (CurTok == ',') {
415 getNextToken();
416 Step = ParseExpression();
417 if (Step == 0)
418 return 0;
419 }
420
421 if (CurTok != tok_in)
422 return Error("expected 'in' after for");
423 getNextToken(); // eat 'in'.
424
425 ExprAST *Body = ParseExpression();
426 if (Body == 0)
427 return 0;
428
429 return new ForExprAST(IdName, Start, End, Step, Body);
430 }
431
432 /// varexpr ::= 'var' identifier ('=' expression)?
433 // (',' identifier ('=' expression)?)* 'in' expression
ParseVarExpr()434 static ExprAST *ParseVarExpr() {
435 getNextToken(); // eat the var.
436
437 std::vector<std::pair<std::string, ExprAST *> > VarNames;
438
439 // At least one variable name is required.
440 if (CurTok != tok_identifier)
441 return Error("expected identifier after var");
442
443 while (1) {
444 std::string Name = IdentifierStr;
445 getNextToken(); // eat identifier.
446
447 // Read the optional initializer.
448 ExprAST *Init = 0;
449 if (CurTok == '=') {
450 getNextToken(); // eat the '='.
451
452 Init = ParseExpression();
453 if (Init == 0)
454 return 0;
455 }
456
457 VarNames.push_back(std::make_pair(Name, Init));
458
459 // End of var list, exit loop.
460 if (CurTok != ',')
461 break;
462 getNextToken(); // eat the ','.
463
464 if (CurTok != tok_identifier)
465 return Error("expected identifier list after var");
466 }
467
468 // At this point, we have to have 'in'.
469 if (CurTok != tok_in)
470 return Error("expected 'in' keyword after 'var'");
471 getNextToken(); // eat 'in'.
472
473 ExprAST *Body = ParseExpression();
474 if (Body == 0)
475 return 0;
476
477 return new VarExprAST(VarNames, Body);
478 }
479
480 /// primary
481 /// ::= identifierexpr
482 /// ::= numberexpr
483 /// ::= parenexpr
484 /// ::= ifexpr
485 /// ::= forexpr
486 /// ::= varexpr
ParsePrimary()487 static ExprAST *ParsePrimary() {
488 switch (CurTok) {
489 default:
490 return Error("unknown token when expecting an expression");
491 case tok_identifier:
492 return ParseIdentifierExpr();
493 case tok_number:
494 return ParseNumberExpr();
495 case '(':
496 return ParseParenExpr();
497 case tok_if:
498 return ParseIfExpr();
499 case tok_for:
500 return ParseForExpr();
501 case tok_var:
502 return ParseVarExpr();
503 }
504 }
505
506 /// unary
507 /// ::= primary
508 /// ::= '!' unary
ParseUnary()509 static ExprAST *ParseUnary() {
510 // If the current token is not an operator, it must be a primary expr.
511 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
512 return ParsePrimary();
513
514 // If this is a unary operator, read it.
515 int Opc = CurTok;
516 getNextToken();
517 if (ExprAST *Operand = ParseUnary())
518 return new UnaryExprAST(Opc, Operand);
519 return 0;
520 }
521
522 /// binoprhs
523 /// ::= ('+' unary)*
ParseBinOpRHS(int ExprPrec,ExprAST * LHS)524 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
525 // If this is a binop, find its precedence.
526 while (1) {
527 int TokPrec = GetTokPrecedence();
528
529 // If this is a binop that binds at least as tightly as the current binop,
530 // consume it, otherwise we are done.
531 if (TokPrec < ExprPrec)
532 return LHS;
533
534 // Okay, we know this is a binop.
535 int BinOp = CurTok;
536 getNextToken(); // eat binop
537
538 // Parse the unary expression after the binary operator.
539 ExprAST *RHS = ParseUnary();
540 if (!RHS)
541 return 0;
542
543 // If BinOp binds less tightly with RHS than the operator after RHS, let
544 // the pending operator take RHS as its LHS.
545 int NextPrec = GetTokPrecedence();
546 if (TokPrec < NextPrec) {
547 RHS = ParseBinOpRHS(TokPrec + 1, RHS);
548 if (RHS == 0)
549 return 0;
550 }
551
552 // Merge LHS/RHS.
553 LHS = new BinaryExprAST(BinOp, LHS, RHS);
554 }
555 }
556
557 /// expression
558 /// ::= unary binoprhs
559 ///
ParseExpression()560 static ExprAST *ParseExpression() {
561 ExprAST *LHS = ParseUnary();
562 if (!LHS)
563 return 0;
564
565 return ParseBinOpRHS(0, LHS);
566 }
567
568 /// prototype
569 /// ::= id '(' id* ')'
570 /// ::= binary LETTER number? (id, id)
571 /// ::= unary LETTER (id)
ParsePrototype()572 static PrototypeAST *ParsePrototype() {
573 std::string FnName;
574
575 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
576 unsigned BinaryPrecedence = 30;
577
578 switch (CurTok) {
579 default:
580 return ErrorP("Expected function name in prototype");
581 case tok_identifier:
582 FnName = IdentifierStr;
583 Kind = 0;
584 getNextToken();
585 break;
586 case tok_unary:
587 getNextToken();
588 if (!isascii(CurTok))
589 return ErrorP("Expected unary operator");
590 FnName = "unary";
591 FnName += (char)CurTok;
592 Kind = 1;
593 getNextToken();
594 break;
595 case tok_binary:
596 getNextToken();
597 if (!isascii(CurTok))
598 return ErrorP("Expected binary operator");
599 FnName = "binary";
600 FnName += (char)CurTok;
601 Kind = 2;
602 getNextToken();
603
604 // Read the precedence if present.
605 if (CurTok == tok_number) {
606 if (NumVal < 1 || NumVal > 100)
607 return ErrorP("Invalid precedecnce: must be 1..100");
608 BinaryPrecedence = (unsigned)NumVal;
609 getNextToken();
610 }
611 break;
612 }
613
614 if (CurTok != '(')
615 return ErrorP("Expected '(' in prototype");
616
617 std::vector<std::string> ArgNames;
618 while (getNextToken() == tok_identifier)
619 ArgNames.push_back(IdentifierStr);
620 if (CurTok != ')')
621 return ErrorP("Expected ')' in prototype");
622
623 // success.
624 getNextToken(); // eat ')'.
625
626 // Verify right number of names for operator.
627 if (Kind && ArgNames.size() != Kind)
628 return ErrorP("Invalid number of operands for operator");
629
630 return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
631 }
632
633 /// definition ::= 'def' prototype expression
ParseDefinition()634 static FunctionAST *ParseDefinition() {
635 getNextToken(); // eat def.
636 PrototypeAST *Proto = ParsePrototype();
637 if (Proto == 0)
638 return 0;
639
640 if (ExprAST *E = ParseExpression())
641 return new FunctionAST(Proto, E);
642 return 0;
643 }
644
645 /// toplevelexpr ::= expression
ParseTopLevelExpr()646 static FunctionAST *ParseTopLevelExpr() {
647 if (ExprAST *E = ParseExpression()) {
648 // Make an anonymous proto.
649 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
650 return new FunctionAST(Proto, E);
651 }
652 return 0;
653 }
654
655 /// external ::= 'extern' prototype
ParseExtern()656 static PrototypeAST *ParseExtern() {
657 getNextToken(); // eat extern.
658 return ParsePrototype();
659 }
660
661 //===----------------------------------------------------------------------===//
662 // Code Generation
663 //===----------------------------------------------------------------------===//
664
665 static Module *TheModule;
666 static IRBuilder<> Builder(getGlobalContext());
667 static std::map<std::string, AllocaInst *> NamedValues;
668 static FunctionPassManager *TheFPM;
669
ErrorV(const char * Str)670 Value *ErrorV(const char *Str) {
671 Error(Str);
672 return 0;
673 }
674
675 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
676 /// the function. This is used for mutable variables etc.
CreateEntryBlockAlloca(Function * TheFunction,const std::string & VarName)677 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
678 const std::string &VarName) {
679 IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
680 TheFunction->getEntryBlock().begin());
681 return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
682 VarName.c_str());
683 }
684
Codegen()685 Value *NumberExprAST::Codegen() {
686 return ConstantFP::get(getGlobalContext(), APFloat(Val));
687 }
688
Codegen()689 Value *VariableExprAST::Codegen() {
690 // Look this variable up in the function.
691 Value *V = NamedValues[Name];
692 if (V == 0)
693 return ErrorV("Unknown variable name");
694
695 // Load the value.
696 return Builder.CreateLoad(V, Name.c_str());
697 }
698
Codegen()699 Value *UnaryExprAST::Codegen() {
700 Value *OperandV = Operand->Codegen();
701 if (OperandV == 0)
702 return 0;
703
704 Function *F = TheModule->getFunction(std::string("unary") + Opcode);
705 if (F == 0)
706 return ErrorV("Unknown unary operator");
707
708 return Builder.CreateCall(F, OperandV, "unop");
709 }
710
Codegen()711 Value *BinaryExprAST::Codegen() {
712 // Special case '=' because we don't want to emit the LHS as an expression.
713 if (Op == '=') {
714 // Assignment requires the LHS to be an identifier.
715 VariableExprAST *LHSE = dynamic_cast<VariableExprAST *>(LHS);
716 if (!LHSE)
717 return ErrorV("destination of '=' must be a variable");
718 // Codegen the RHS.
719 Value *Val = RHS->Codegen();
720 if (Val == 0)
721 return 0;
722
723 // Look up the name.
724 Value *Variable = NamedValues[LHSE->getName()];
725 if (Variable == 0)
726 return ErrorV("Unknown variable name");
727
728 Builder.CreateStore(Val, Variable);
729 return Val;
730 }
731
732 Value *L = LHS->Codegen();
733 Value *R = RHS->Codegen();
734 if (L == 0 || R == 0)
735 return 0;
736
737 switch (Op) {
738 case '+':
739 return Builder.CreateFAdd(L, R, "addtmp");
740 case '-':
741 return Builder.CreateFSub(L, R, "subtmp");
742 case '*':
743 return Builder.CreateFMul(L, R, "multmp");
744 case '<':
745 L = Builder.CreateFCmpULT(L, R, "cmptmp");
746 // Convert bool 0/1 to double 0.0 or 1.0
747 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
748 "booltmp");
749 default:
750 break;
751 }
752
753 // If it wasn't a builtin binary operator, it must be a user defined one. Emit
754 // a call to it.
755 Function *F = TheModule->getFunction(std::string("binary") + Op);
756 assert(F && "binary operator not found!");
757
758 Value *Ops[] = { L, R };
759 return Builder.CreateCall(F, Ops, "binop");
760 }
761
Codegen()762 Value *CallExprAST::Codegen() {
763 // Look up the name in the global module table.
764 Function *CalleeF = TheModule->getFunction(Callee);
765 if (CalleeF == 0)
766 return ErrorV("Unknown function referenced");
767
768 // If argument mismatch error.
769 if (CalleeF->arg_size() != Args.size())
770 return ErrorV("Incorrect # arguments passed");
771
772 std::vector<Value *> ArgsV;
773 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
774 ArgsV.push_back(Args[i]->Codegen());
775 if (ArgsV.back() == 0)
776 return 0;
777 }
778
779 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
780 }
781
Codegen()782 Value *IfExprAST::Codegen() {
783 Value *CondV = Cond->Codegen();
784 if (CondV == 0)
785 return 0;
786
787 // Convert condition to a bool by comparing equal to 0.0.
788 CondV = Builder.CreateFCmpONE(
789 CondV, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond");
790
791 Function *TheFunction = Builder.GetInsertBlock()->getParent();
792
793 // Create blocks for the then and else cases. Insert the 'then' block at the
794 // end of the function.
795 BasicBlock *ThenBB =
796 BasicBlock::Create(getGlobalContext(), "then", TheFunction);
797 BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
798 BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
799
800 Builder.CreateCondBr(CondV, ThenBB, ElseBB);
801
802 // Emit then value.
803 Builder.SetInsertPoint(ThenBB);
804
805 Value *ThenV = Then->Codegen();
806 if (ThenV == 0)
807 return 0;
808
809 Builder.CreateBr(MergeBB);
810 // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
811 ThenBB = Builder.GetInsertBlock();
812
813 // Emit else block.
814 TheFunction->getBasicBlockList().push_back(ElseBB);
815 Builder.SetInsertPoint(ElseBB);
816
817 Value *ElseV = Else->Codegen();
818 if (ElseV == 0)
819 return 0;
820
821 Builder.CreateBr(MergeBB);
822 // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
823 ElseBB = Builder.GetInsertBlock();
824
825 // Emit merge block.
826 TheFunction->getBasicBlockList().push_back(MergeBB);
827 Builder.SetInsertPoint(MergeBB);
828 PHINode *PN =
829 Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp");
830
831 PN->addIncoming(ThenV, ThenBB);
832 PN->addIncoming(ElseV, ElseBB);
833 return PN;
834 }
835
Codegen()836 Value *ForExprAST::Codegen() {
837 // Output this as:
838 // var = alloca double
839 // ...
840 // start = startexpr
841 // store start -> var
842 // goto loop
843 // loop:
844 // ...
845 // bodyexpr
846 // ...
847 // loopend:
848 // step = stepexpr
849 // endcond = endexpr
850 //
851 // curvar = load var
852 // nextvar = curvar + step
853 // store nextvar -> var
854 // br endcond, loop, endloop
855 // outloop:
856
857 Function *TheFunction = Builder.GetInsertBlock()->getParent();
858
859 // Create an alloca for the variable in the entry block.
860 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
861
862 // Emit the start code first, without 'variable' in scope.
863 Value *StartVal = Start->Codegen();
864 if (StartVal == 0)
865 return 0;
866
867 // Store the value into the alloca.
868 Builder.CreateStore(StartVal, Alloca);
869
870 // Make the new basic block for the loop header, inserting after current
871 // block.
872 BasicBlock *LoopBB =
873 BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
874
875 // Insert an explicit fall through from the current block to the LoopBB.
876 Builder.CreateBr(LoopBB);
877
878 // Start insertion in LoopBB.
879 Builder.SetInsertPoint(LoopBB);
880
881 // Within the loop, the variable is defined equal to the PHI node. If it
882 // shadows an existing variable, we have to restore it, so save it now.
883 AllocaInst *OldVal = NamedValues[VarName];
884 NamedValues[VarName] = Alloca;
885
886 // Emit the body of the loop. This, like any other expr, can change the
887 // current BB. Note that we ignore the value computed by the body, but don't
888 // allow an error.
889 if (Body->Codegen() == 0)
890 return 0;
891
892 // Emit the step value.
893 Value *StepVal;
894 if (Step) {
895 StepVal = Step->Codegen();
896 if (StepVal == 0)
897 return 0;
898 } else {
899 // If not specified, use 1.0.
900 StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
901 }
902
903 // Compute the end condition.
904 Value *EndCond = End->Codegen();
905 if (EndCond == 0)
906 return EndCond;
907
908 // Reload, increment, and restore the alloca. This handles the case where
909 // the body of the loop mutates the variable.
910 Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
911 Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
912 Builder.CreateStore(NextVar, Alloca);
913
914 // Convert condition to a bool by comparing equal to 0.0.
915 EndCond = Builder.CreateFCmpONE(
916 EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond");
917
918 // Create the "after loop" block and insert it.
919 BasicBlock *AfterBB =
920 BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
921
922 // Insert the conditional branch into the end of LoopEndBB.
923 Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
924
925 // Any new code will be inserted in AfterBB.
926 Builder.SetInsertPoint(AfterBB);
927
928 // Restore the unshadowed variable.
929 if (OldVal)
930 NamedValues[VarName] = OldVal;
931 else
932 NamedValues.erase(VarName);
933
934 // for expr always returns 0.0.
935 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
936 }
937
Codegen()938 Value *VarExprAST::Codegen() {
939 std::vector<AllocaInst *> OldBindings;
940
941 Function *TheFunction = Builder.GetInsertBlock()->getParent();
942
943 // Register all variables and emit their initializer.
944 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
945 const std::string &VarName = VarNames[i].first;
946 ExprAST *Init = VarNames[i].second;
947
948 // Emit the initializer before adding the variable to scope, this prevents
949 // the initializer from referencing the variable itself, and permits stuff
950 // like this:
951 // var a = 1 in
952 // var a = a in ... # refers to outer 'a'.
953 Value *InitVal;
954 if (Init) {
955 InitVal = Init->Codegen();
956 if (InitVal == 0)
957 return 0;
958 } else { // If not specified, use 0.0.
959 InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
960 }
961
962 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
963 Builder.CreateStore(InitVal, Alloca);
964
965 // Remember the old variable binding so that we can restore the binding when
966 // we unrecurse.
967 OldBindings.push_back(NamedValues[VarName]);
968
969 // Remember this binding.
970 NamedValues[VarName] = Alloca;
971 }
972
973 // Codegen the body, now that all vars are in scope.
974 Value *BodyVal = Body->Codegen();
975 if (BodyVal == 0)
976 return 0;
977
978 // Pop all our variables from scope.
979 for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
980 NamedValues[VarNames[i].first] = OldBindings[i];
981
982 // Return the body computation.
983 return BodyVal;
984 }
985
Codegen()986 Function *PrototypeAST::Codegen() {
987 // Make the function type: double(double,double) etc.
988 std::vector<Type *> Doubles(Args.size(),
989 Type::getDoubleTy(getGlobalContext()));
990 FunctionType *FT =
991 FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
992
993 Function *F =
994 Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
995
996 // If F conflicted, there was already something named 'Name'. If it has a
997 // body, don't allow redefinition or reextern.
998 if (F->getName() != Name) {
999 // Delete the one we just made and get the existing one.
1000 F->eraseFromParent();
1001 F = TheModule->getFunction(Name);
1002
1003 // If F already has a body, reject this.
1004 if (!F->empty()) {
1005 ErrorF("redefinition of function");
1006 return 0;
1007 }
1008
1009 // If F took a different number of args, reject.
1010 if (F->arg_size() != Args.size()) {
1011 ErrorF("redefinition of function with different # args");
1012 return 0;
1013 }
1014 }
1015
1016 // Set names for all arguments.
1017 unsigned Idx = 0;
1018 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
1019 ++AI, ++Idx)
1020 AI->setName(Args[Idx]);
1021
1022 return F;
1023 }
1024
1025 /// CreateArgumentAllocas - Create an alloca for each argument and register the
1026 /// argument in the symbol table so that references to it will succeed.
CreateArgumentAllocas(Function * F)1027 void PrototypeAST::CreateArgumentAllocas(Function *F) {
1028 Function::arg_iterator AI = F->arg_begin();
1029 for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
1030 // Create an alloca for this variable.
1031 AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
1032
1033 // Store the initial value into the alloca.
1034 Builder.CreateStore(AI, Alloca);
1035
1036 // Add arguments to variable symbol table.
1037 NamedValues[Args[Idx]] = Alloca;
1038 }
1039 }
1040
Codegen()1041 Function *FunctionAST::Codegen() {
1042 NamedValues.clear();
1043
1044 Function *TheFunction = Proto->Codegen();
1045 if (TheFunction == 0)
1046 return 0;
1047
1048 // If this is an operator, install it.
1049 if (Proto->isBinaryOp())
1050 BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
1051
1052 // Create a new basic block to start insertion into.
1053 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
1054 Builder.SetInsertPoint(BB);
1055
1056 // Add all arguments to the symbol table and create their allocas.
1057 Proto->CreateArgumentAllocas(TheFunction);
1058
1059 if (Value *RetVal = Body->Codegen()) {
1060 // Finish off the function.
1061 Builder.CreateRet(RetVal);
1062
1063 // Validate the generated code, checking for consistency.
1064 verifyFunction(*TheFunction);
1065
1066 // Optimize the function.
1067 TheFPM->run(*TheFunction);
1068
1069 return TheFunction;
1070 }
1071
1072 // Error reading body, remove function.
1073 TheFunction->eraseFromParent();
1074
1075 if (Proto->isBinaryOp())
1076 BinopPrecedence.erase(Proto->getOperatorName());
1077 return 0;
1078 }
1079
1080 //===----------------------------------------------------------------------===//
1081 // Top-Level parsing and JIT Driver
1082 //===----------------------------------------------------------------------===//
1083
1084 static ExecutionEngine *TheExecutionEngine;
1085
HandleDefinition()1086 static void HandleDefinition() {
1087 if (FunctionAST *F = ParseDefinition()) {
1088 if (Function *LF = F->Codegen()) {
1089 fprintf(stderr, "Read function definition:");
1090 LF->dump();
1091 }
1092 } else {
1093 // Skip token for error recovery.
1094 getNextToken();
1095 }
1096 }
1097
HandleExtern()1098 static void HandleExtern() {
1099 if (PrototypeAST *P = ParseExtern()) {
1100 if (Function *F = P->Codegen()) {
1101 fprintf(stderr, "Read extern: ");
1102 F->dump();
1103 }
1104 } else {
1105 // Skip token for error recovery.
1106 getNextToken();
1107 }
1108 }
1109
HandleTopLevelExpression()1110 static void HandleTopLevelExpression() {
1111 // Evaluate a top-level expression into an anonymous function.
1112 if (FunctionAST *F = ParseTopLevelExpr()) {
1113 if (Function *LF = F->Codegen()) {
1114 TheExecutionEngine->finalizeObject();
1115 // JIT the function, returning a function pointer.
1116 void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
1117
1118 // Cast it to the right type (takes no arguments, returns a double) so we
1119 // can call it as a native function.
1120 double (*FP)() = (double (*)())(intptr_t)FPtr;
1121 fprintf(stderr, "Evaluated to %f\n", FP());
1122 }
1123 } else {
1124 // Skip token for error recovery.
1125 getNextToken();
1126 }
1127 }
1128
1129 /// top ::= definition | external | expression | ';'
MainLoop()1130 static void MainLoop() {
1131 while (1) {
1132 fprintf(stderr, "ready> ");
1133 switch (CurTok) {
1134 case tok_eof:
1135 return;
1136 case ';':
1137 getNextToken();
1138 break; // ignore top-level semicolons.
1139 case tok_def:
1140 HandleDefinition();
1141 break;
1142 case tok_extern:
1143 HandleExtern();
1144 break;
1145 default:
1146 HandleTopLevelExpression();
1147 break;
1148 }
1149 }
1150 }
1151
1152 //===----------------------------------------------------------------------===//
1153 // "Library" functions that can be "extern'd" from user code.
1154 //===----------------------------------------------------------------------===//
1155
1156 /// putchard - putchar that takes a double and returns 0.
putchard(double X)1157 extern "C" double putchard(double X) {
1158 putchar((char)X);
1159 return 0;
1160 }
1161
1162 /// printd - printf that takes a double prints it as "%f\n", returning 0.
printd(double X)1163 extern "C" double printd(double X) {
1164 printf("%f\n", X);
1165 return 0;
1166 }
1167
1168 //===----------------------------------------------------------------------===//
1169 // Main driver code.
1170 //===----------------------------------------------------------------------===//
1171
main()1172 int main() {
1173 InitializeNativeTarget();
1174 InitializeNativeTargetAsmPrinter();
1175 InitializeNativeTargetAsmParser();
1176 LLVMContext &Context = getGlobalContext();
1177
1178 // Install standard binary operators.
1179 // 1 is lowest precedence.
1180 BinopPrecedence['='] = 2;
1181 BinopPrecedence['<'] = 10;
1182 BinopPrecedence['+'] = 20;
1183 BinopPrecedence['-'] = 20;
1184 BinopPrecedence['*'] = 40; // highest.
1185
1186 // Prime the first token.
1187 fprintf(stderr, "ready> ");
1188 getNextToken();
1189
1190 // Make the module, which holds all the code.
1191 std::unique_ptr<Module> Owner = make_unique<Module>("my cool jit", Context);
1192 TheModule = Owner.get();
1193
1194 // Create the JIT. This takes ownership of the module.
1195 std::string ErrStr;
1196 TheExecutionEngine =
1197 EngineBuilder(std::move(Owner))
1198 .setErrorStr(&ErrStr)
1199 .setMCJITMemoryManager(llvm::make_unique<SectionMemoryManager>())
1200 .create();
1201 if (!TheExecutionEngine) {
1202 fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
1203 exit(1);
1204 }
1205
1206 FunctionPassManager OurFPM(TheModule);
1207
1208 // Set up the optimizer pipeline. Start with registering info about how the
1209 // target lays out data structures.
1210 TheModule->setDataLayout(TheExecutionEngine->getDataLayout());
1211 OurFPM.add(new DataLayoutPass());
1212 // Provide basic AliasAnalysis support for GVN.
1213 OurFPM.add(createBasicAliasAnalysisPass());
1214 // Promote allocas to registers.
1215 OurFPM.add(createPromoteMemoryToRegisterPass());
1216 // Do simple "peephole" optimizations and bit-twiddling optzns.
1217 OurFPM.add(createInstructionCombiningPass());
1218 // Reassociate expressions.
1219 OurFPM.add(createReassociatePass());
1220 // Eliminate Common SubExpressions.
1221 OurFPM.add(createGVNPass());
1222 // Simplify the control flow graph (deleting unreachable blocks, etc).
1223 OurFPM.add(createCFGSimplificationPass());
1224
1225 OurFPM.doInitialization();
1226
1227 // Set the global so the code gen can use this.
1228 TheFPM = &OurFPM;
1229
1230 // Run the main "interpreter loop" now.
1231 MainLoop();
1232
1233 TheFPM = 0;
1234
1235 // Print out all of the generated code.
1236 TheModule->dump();
1237
1238 return 0;
1239 }
1240