compiler.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. /**
  2. ******************************************************************************
  3. * @file : compiler.c
  4. * @author : simon
  5. * @brief : None
  6. * @attention : None
  7. * @date : 2023/8/17
  8. ******************************************************************************
  9. */
  10. #include "compiler.h"
  11. #include "common.h"
  12. #include "scanner.h"
  13. #ifdef DEBUG_PRINT_CODE
  14. #include "debug.h"
  15. #endif
  16. typedef struct {
  17. Token current;
  18. Token previous;
  19. bool hadError;
  20. bool panicMode;
  21. } Parser;
  22. typedef enum {
  23. PREC_NONE,
  24. PREC_ASSIGNMENT,// =
  25. PREC_OR, // or
  26. PREC_AND, // and
  27. PREC_EQUALITY, // == !=
  28. PREC_COMPARISON,// < > <= >=
  29. PREC_TERM, // + -
  30. PREC_FACTOR, // * /
  31. PREC_UNARY, // ! -
  32. PREC_CALL, // . ()
  33. PREC_PRIMARY
  34. } Precedence;
  35. typedef void (*ParseFn)(bool canAssign);
  36. typedef struct {
  37. ParseFn prefix;
  38. ParseFn infix;
  39. Precedence precedence;
  40. } ParseRule;
  41. typedef struct {
  42. Token name;
  43. int depth;
  44. } Local;
  45. typedef struct {
  46. Local locals[UINT8_COUNT];
  47. int localCount;/// how many locals are in scope
  48. int scopeDepth;
  49. } Compiler;
  50. Parser parser;
  51. Compiler *current = NULL;
  52. Chunk *compilingChunk;
  53. static void parsePrecedence(Precedence);
  54. static uint8_t parseVariable(const char *);
  55. static void defineVariable(uint8_t);
  56. static uint8_t identifierConstant(Token *name);
  57. static bool identifiersEqual(Token *a, Token *b);
  58. static ParseRule *getRule(TokenType);
  59. static void statement();
  60. static void declaration();
  61. static void beginScope();
  62. static void endScope();
  63. static Chunk *currentChunk() {
  64. return compilingChunk;
  65. }
  66. static void errorAt(Token *token, const char *message) {
  67. if (parser.panicMode) return;
  68. parser.panicMode = true;
  69. fprintf(stderr, "[line %d] Error", token->line);
  70. if (token->type == TOKEN_EOF) {
  71. fprintf(stderr, " at end");
  72. } else if (token->type == TOKEN_ERROR) {
  73. ///! Nothing.
  74. } else {
  75. fprintf(stderr, " at '%.*s'", token->length, token->start);
  76. }
  77. fprintf(stderr, ": %s\n", message);
  78. parser.hadError = true;
  79. }
  80. static void error(const char *message) {
  81. errorAt(&parser.previous, message);
  82. }
  83. static void errorAtCurrent(const char *message) {
  84. errorAt(&parser.current, message);
  85. }
  86. static void advance() {
  87. parser.previous = parser.current;
  88. for (;;) {
  89. parser.current = scanToken();
  90. if (parser.current.type != TOKEN_ERROR) break;
  91. errorAtCurrent(parser.current.start);
  92. }
  93. }
  94. void consume(TokenType type, const char *message) {
  95. if (parser.current.type == type) {
  96. advance();
  97. return;
  98. }
  99. errorAtCurrent(message);
  100. }
  101. static bool check(TokenType type) {
  102. return parser.current.type == type;
  103. }
  104. static bool match(TokenType type) {
  105. if (!check(type)) return false;
  106. advance();
  107. return true;
  108. }
  109. static void emitByte(uint8_t byte) {
  110. writeChunk(currentChunk(), byte, parser.previous.line);
  111. }
  112. static void emitBytes(uint8_t byte1, uint8_t byte2) {
  113. emitByte(byte1);
  114. emitByte(byte2);
  115. }
  116. static void emitReturn() {
  117. emitByte(OP_RETURN);
  118. }
  119. static uint8_t makeConstant(Value value) {
  120. int constant = addConstant(currentChunk(), value);
  121. if (constant > UINT8_MAX) {
  122. error("Too many constants in one chunk.");
  123. return 0;
  124. }
  125. return (uint8_t) constant;
  126. }
  127. static void expression() {
  128. parsePrecedence(PREC_ASSIGNMENT);
  129. }
  130. /// for example: var a; var b = 10;
  131. static void varDeclaration() {
  132. uint8_t global = parseVariable("Expect variable name.");
  133. // 变量初始化
  134. if (match(TOKEN_EQUAL)) {
  135. expression();
  136. } else {
  137. emitByte(OP_NIL);
  138. }
  139. consume(TOKEN_SEMICOLON, "Expect ';' after variable declaration.");
  140. defineVariable(global);
  141. }
  142. static void expressionStatement() {
  143. expression();
  144. consume(TOKEN_SEMICOLON, "Expect ';' after expression.");
  145. emitByte(OP_POP);
  146. }
  147. static void printStatement() {
  148. expression();
  149. consume(TOKEN_SEMICOLON, "Expect ';' after value.");
  150. emitByte(OP_PRINT);
  151. }
  152. static void synchronize() {
  153. parser.panicMode = false;
  154. while (parser.current.type != TOKEN_EOF) {
  155. if (parser.previous.type == TOKEN_SEMICOLON) return;
  156. switch (parser.current.type) {
  157. case TOKEN_CLASS:
  158. case TOKEN_FUN:
  159. case TOKEN_VAR:
  160. case TOKEN_FOR:
  161. case TOKEN_IF:
  162. case TOKEN_WHILE:
  163. case TOKEN_PRINT:
  164. case TOKEN_RETURN:
  165. return;
  166. default:;// Do nothing.
  167. }
  168. advance();
  169. }
  170. }
  171. ///declaration → classDecl
  172. /// | funDecl
  173. /// | varDecl
  174. /// | statement ;
  175. static void declaration() {
  176. if (match(TOKEN_VAR)) {
  177. varDeclaration();
  178. } else {
  179. statement();
  180. }
  181. if (parser.panicMode) synchronize();
  182. }
  183. static void block() {
  184. while (!check(TOKEN_RIGHT_BRACE) && !check(TOKEN_EOF)) {
  185. declaration();
  186. }
  187. consume(TOKEN_RIGHT_BRACE, "Expect '}' after block.");
  188. }
  189. ///statement → exprStmt
  190. /// | forStmt
  191. /// | ifStmt
  192. /// | printStmt
  193. /// | returnStmt
  194. /// | whileStmt
  195. /// | block ;
  196. static void statement() {
  197. if (match(TOKEN_PRINT)) {
  198. printStatement();
  199. } else if (match(TOKEN_LEFT_BRACE)) {
  200. ///! block → "{" declaration* "}" ;
  201. beginScope();
  202. block();
  203. endScope();
  204. } else {
  205. expressionStatement();
  206. }
  207. }
  208. static void beginScope() {
  209. current->scopeDepth++;
  210. }
  211. static void endScope() {
  212. current->scopeDepth--;
  213. while (current->localCount > 0
  214. && current->locals[current->localCount - 1].depth > current->scopeDepth) {
  215. emitByte(OP_POP);
  216. current->localCount--;
  217. }
  218. }
  219. static void emitConstant(Value value) {
  220. emitBytes(OP_CONSTANT, makeConstant(value));
  221. }
  222. static void initCompiler(Compiler *compiler) {
  223. compiler->localCount = 0;
  224. compiler->scopeDepth = 0;
  225. current = compiler;
  226. }
  227. static void endCompiler() {
  228. emitReturn();
  229. #ifdef DEBUG_PRINT_CODE
  230. if (!parser.hadError) {
  231. disassembleChunk(currentChunk(), "code");
  232. }
  233. #endif
  234. }
  235. static void grouping(__attribute__((unused)) bool canAssign) {
  236. expression();
  237. consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression.");
  238. }
  239. /// \brief parse number token
  240. /// Number literals: 123
  241. static void number(__attribute__((unused)) bool canAssign) {
  242. double value = strtod(parser.previous.start, NULL);
  243. emitConstant(NUMBER_VAL(value));
  244. }
  245. static void string(__attribute__((unused)) bool canAssign) {
  246. emitConstant(OBJ_VAL(copyString(parser.previous.start + 1,
  247. parser.previous.length - 2)));
  248. }
  249. static int resolveLocal(Compiler *compile, Token *name) {
  250. for (int i = compile->localCount - 1; i >= 0; i--) {
  251. Local *local = &compile->locals[i];
  252. if (identifiersEqual(name, &local->name)) {
  253. if (local->depth == -1) {
  254. error("Can't read local variable in ints own initializer.");
  255. }
  256. return i;
  257. }
  258. }
  259. return -1;
  260. }
  261. static void namedVariable(Token name, bool canAssign) {
  262. uint8_t getOp, setOp;
  263. int arg = resolveLocal(current, &name);
  264. if (arg != -1) {
  265. getOp = OP_GET_LOCAL;
  266. setOp = OP_SET_LOCAL;
  267. } else {
  268. arg = identifierConstant(&name);
  269. getOp = OP_GET_GLOBAL;
  270. setOp = OP_SET_GLOBAL;
  271. }
  272. if (canAssign && match(TOKEN_EQUAL)) {
  273. // 如 menu.brunch(sunday).beverage = "mimosa";
  274. expression();
  275. emitBytes(setOp, (uint8_t) arg);
  276. } else {
  277. emitBytes(getOp, (uint8_t) arg);
  278. }
  279. }
  280. static void variable(bool canAssign) {
  281. namedVariable(parser.previous, canAssign);
  282. }
  283. /// Unary negation: -123
  284. static void unary(__attribute__((unused)) bool canAssign) {
  285. TokenType operatorType = parser.previous.type;
  286. // Compile the operand.
  287. parsePrecedence(PREC_UNARY);
  288. // Emit the operator instruction.
  289. switch (operatorType) {
  290. case TOKEN_BANG:
  291. emitByte(OP_NOT);
  292. break;
  293. case TOKEN_MINUS:
  294. emitByte(OP_NEGATE);
  295. break;
  296. default: return;// Unreachable.
  297. }
  298. }
  299. /// \brief infix parser
  300. static void binary(__attribute__((unused)) bool canAssign) {
  301. TokenType operatorType = parser.previous.type;
  302. ParseRule *rule = getRule(operatorType);
  303. parsePrecedence((Precedence) rule->precedence + 1);
  304. switch (operatorType) {
  305. case TOKEN_BANG_EQUAL:
  306. emitBytes(OP_EQUAL, OP_NOT);
  307. break;
  308. case TOKEN_EQUAL_EQUAL:
  309. emitByte(OP_EQUAL);
  310. break;
  311. case TOKEN_GREATER:
  312. emitByte(OP_GREATER);
  313. break;
  314. case TOKEN_GREATER_EQUAL:
  315. emitBytes(OP_LESS, OP_NOT);
  316. break;
  317. case TOKEN_LESS:
  318. emitByte(OP_LESS);
  319. break;
  320. case TOKEN_LESS_EQUAL:
  321. emitBytes(OP_GREATER, OP_NOT);
  322. break;
  323. case TOKEN_PLUS:
  324. emitByte(OP_ADD);
  325. break;
  326. case TOKEN_MINUS:
  327. emitByte(OP_SUBTRACT);
  328. break;
  329. case TOKEN_STAR:
  330. emitByte(OP_MULTIPLY);
  331. break;
  332. case TOKEN_SLASH:
  333. emitByte(OP_DIVIDE);
  334. break;
  335. default: return;// Unreachable.
  336. }
  337. }
  338. static void literal(__attribute__((unused)) bool canAssign) {
  339. switch (parser.previous.type) {
  340. case TOKEN_FALSE:
  341. emitByte(OP_FALSE);
  342. break;
  343. case TOKEN_NIL:
  344. emitByte(OP_NIL);
  345. break;
  346. case TOKEN_TRUE:
  347. emitByte(OP_TRUE);
  348. break;
  349. default: return;// Unreachable
  350. }
  351. }
  352. ParseRule rules[] = {
  353. [TOKEN_LEFT_PAREN] = {grouping, NULL, PREC_NONE},
  354. [TOKEN_RIGHT_PAREN] = {NULL, NULL, PREC_NONE},
  355. [TOKEN_LEFT_BRACE] = {NULL, NULL, PREC_NONE},
  356. [TOKEN_RIGHT_BRACE] = {NULL, NULL, PREC_NONE},
  357. [TOKEN_COMMA] = {NULL, NULL, PREC_NONE},
  358. [TOKEN_DOT] = {NULL, NULL, PREC_NONE},
  359. [TOKEN_MINUS] = {unary, binary, PREC_TERM},
  360. [TOKEN_PLUS] = {NULL, binary, PREC_TERM},
  361. [TOKEN_SEMICOLON] = {NULL, NULL, PREC_NONE},
  362. [TOKEN_SLASH] = {NULL, binary, PREC_FACTOR},
  363. [TOKEN_STAR] = {NULL, binary, PREC_FACTOR},
  364. [TOKEN_BANG] = {unary, NULL, PREC_NONE},
  365. [TOKEN_BANG_EQUAL] = {NULL, binary, PREC_EQUALITY},
  366. [TOKEN_EQUAL] = {NULL, NULL, PREC_NONE},
  367. [TOKEN_EQUAL_EQUAL] = {NULL, binary, PREC_EQUALITY},
  368. [TOKEN_GREATER] = {NULL, binary, PREC_COMPARISON},
  369. [TOKEN_GREATER_EQUAL] = {NULL, binary, PREC_COMPARISON},
  370. [TOKEN_LESS] = {NULL, binary, PREC_COMPARISON},
  371. [TOKEN_LESS_EQUAL] = {NULL, binary, PREC_COMPARISON},
  372. [TOKEN_IDENTIFIER] = {variable, NULL, PREC_NONE},
  373. [TOKEN_STRING] = {string, NULL, PREC_NONE},
  374. [TOKEN_NUMBER] = {number, NULL, PREC_NONE},
  375. [TOKEN_AND] = {NULL, NULL, PREC_NONE},
  376. [TOKEN_CLASS] = {NULL, NULL, PREC_NONE},
  377. [TOKEN_ELSE] = {NULL, NULL, PREC_NONE},
  378. [TOKEN_FALSE] = {literal, NULL, PREC_NONE},
  379. [TOKEN_FOR] = {NULL, NULL, PREC_NONE},
  380. [TOKEN_FUN] = {NULL, NULL, PREC_NONE},
  381. [TOKEN_IF] = {NULL, NULL, PREC_NONE},
  382. [TOKEN_NIL] = {literal, NULL, PREC_NONE},
  383. [TOKEN_OR] = {NULL, NULL, PREC_NONE},
  384. [TOKEN_PRINT] = {NULL, NULL, PREC_NONE},
  385. [TOKEN_RETURN] = {NULL, NULL, PREC_NONE},
  386. [TOKEN_SUPER] = {NULL, NULL, PREC_NONE},
  387. [TOKEN_THIS] = {NULL, NULL, PREC_NONE},
  388. [TOKEN_TRUE] = {literal, NULL, PREC_NONE},
  389. [TOKEN_VAR] = {NULL, NULL, PREC_NONE},
  390. [TOKEN_WHILE] = {NULL, NULL, PREC_NONE},
  391. [TOKEN_ERROR] = {NULL, NULL, PREC_NONE},
  392. [TOKEN_EOF] = {NULL, NULL, PREC_NONE},
  393. };
  394. /// \brief 优先级处理
  395. /// \param precedence
  396. static void parsePrecedence(Precedence precedence) {
  397. advance();
  398. ParseFn prefixRule = getRule(parser.previous.type)->prefix;
  399. if (prefixRule == NULL) {
  400. error("Expect expression.");
  401. return;
  402. }
  403. bool canAssign = precedence <= PREC_ASSIGNMENT;
  404. prefixRule(canAssign);///! 执行具体函数
  405. while (precedence <= getRule(parser.current.type)->precedence) {
  406. advance();
  407. ParseFn infixRule = getRule(parser.previous.type)->infix;
  408. infixRule(canAssign);///! 执行具体函数
  409. }
  410. if (canAssign && match(TOKEN_EQUAL)) {
  411. error("Invalid assignment target.");
  412. }
  413. }
  414. static uint8_t identifierConstant(Token *name) {
  415. return makeConstant(OBJ_VAL(copyString(name->start, name->length)));
  416. }
  417. static bool identifiersEqual(Token *a, Token *b) {
  418. if (a->length != b->length) return false;
  419. return memcmp(a->start, b->start, a->length) == 0;
  420. }
  421. static void addLocal(Token name) {
  422. if (current->localCount == UINT8_COUNT) {
  423. error("Too many local variables in function.");
  424. return;
  425. }
  426. Local *local = &current->locals[current->localCount++];
  427. local->name = name;
  428. local->depth = -1;// 未初始化
  429. }
  430. static void declareVariable() {
  431. if (current->scopeDepth == 0) return;
  432. Token *name = &parser.previous;
  433. // we detect the error like:
  434. // {
  435. // var a = "first";
  436. // var a = "second";
  437. // }
  438. for (int i = current->localCount - 1; i >= 0; i--) {
  439. Local *local = &current->locals[i];
  440. if (local->depth != -1 && local->depth < current->scopeDepth) {
  441. break;
  442. }
  443. if (identifiersEqual(name, &local->name)) {
  444. error("Already a variable with the name in this scope.");
  445. }
  446. }
  447. addLocal(*name);
  448. }
  449. static uint8_t parseVariable(const char *errorMessage) {
  450. consume(TOKEN_IDENTIFIER, errorMessage);
  451. declareVariable();// 处理本地变量
  452. if (current->scopeDepth > 0) return 0;
  453. return identifierConstant(&parser.previous);
  454. }
  455. static void defineVariable(uint8_t global) {
  456. if (current->scopeDepth > 0) {
  457. // markInitialized 未初始化时值 为 -1
  458. current->locals[current->localCount - 1].depth = current->scopeDepth;
  459. // 本地变量,直接退出
  460. return;
  461. }
  462. emitBytes(OP_DEFINE_GLOBAL, global);
  463. }
  464. static ParseRule *getRule(TokenType type) {
  465. return &rules[type];
  466. }
  467. /**
  468. ******************************************************************************
  469. * statement → exprStmt
  470. | forStmt
  471. | ifStmt
  472. | printStmt
  473. | returnStmt
  474. | whileStmt
  475. | block ;
  476. block → "{" declaration* "}" ;
  477. declaration → classDecl
  478. | funDecl
  479. | varDecl
  480. | statement ;
  481. ******************************************************************************
  482. */
  483. bool compile(const char *source, Chunk *chunk) {
  484. Compiler compiler;
  485. initScanner(source);
  486. initCompiler(&compiler);
  487. compilingChunk = chunk;
  488. parser.hadError = false;
  489. parser.panicMode = false;
  490. advance();
  491. while (!match(TOKEN_EOF)) {
  492. declaration();
  493. }
  494. endCompiler();//! return opcode
  495. return !parser.hadError;
  496. }