compiler.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. /**
  2. ******************************************************************************
  3. * @file : compiler.c
  4. * @author : simon
  5. * @brief : None
  6. * @attention : None
  7. * @date : 2023/8/17
  8. ******************************************************************************
  9. */
  10. #include "compiler.h"
  11. #include "common.h"
  12. #include "scanner.h"
  13. #ifdef DEBUG_PRINT_CODE
  14. #include "debug.h"
  15. #endif
  16. typedef struct {
  17. Token current;
  18. Token previous;
  19. bool hadError;
  20. bool panicMode;
  21. } Parser;
  22. typedef enum {
  23. PREC_NONE,
  24. PREC_ASSIGNMENT,// =
  25. PREC_OR, // or
  26. PREC_AND, // and
  27. PREC_EQUALITY, // == !=
  28. PREC_COMPARISON,// < > <= >=
  29. PREC_TERM, // + -
  30. PREC_FACTOR, // * /
  31. PREC_UNARY, // ! -
  32. PREC_CALL, // . ()
  33. PREC_PRIMARY
  34. } Precedence;
  35. typedef void (*ParseFn)(bool canAssign);
  36. typedef struct {
  37. ParseFn prefix;
  38. ParseFn infix;
  39. Precedence precedence;
  40. } ParseRule;
  41. typedef struct {
  42. Token name;
  43. int depth;
  44. } Local;
  45. typedef struct {
  46. Local locals[UINT8_COUNT];
  47. int localCount;/// how many locals are in scope
  48. int scopeDepth;
  49. } Compiler;
  50. Parser parser;
  51. Compiler *current = NULL;
  52. Chunk *compilingChunk;
  53. static void parsePrecedence(Precedence);
  54. static uint8_t parseVariable(const char *);
  55. static void defineVariable(uint8_t);
  56. static uint8_t identifierConstant(Token *name);
  57. static bool identifiersEqual(Token *a, Token *b);
  58. static ParseRule *getRule(TokenType);
  59. static void statement();
  60. static void declaration();
  61. static void beginScope();
  62. static void endScope();
  63. static Chunk *currentChunk() {
  64. return compilingChunk;
  65. }
  66. static void errorAt(Token *token, const char *message) {
  67. if (parser.panicMode) return;
  68. parser.panicMode = true;
  69. fprintf(stderr, "[line %d] Error", token->line);
  70. if (token->type == TOKEN_EOF) {
  71. fprintf(stderr, " at end");
  72. } else if (token->type == TOKEN_ERROR) {
  73. ///! Nothing.
  74. } else {
  75. fprintf(stderr, " at '%.*s'", token->length, token->start);
  76. }
  77. fprintf(stderr, ": %s\n", message);
  78. parser.hadError = true;
  79. }
  80. static void error(const char *message) {
  81. errorAt(&parser.previous, message);
  82. }
  83. static void errorAtCurrent(const char *message) {
  84. errorAt(&parser.current, message);
  85. }
  86. static void advance() {
  87. parser.previous = parser.current;
  88. for (;;) {
  89. parser.current = scanToken();
  90. if (parser.current.type != TOKEN_ERROR) break;
  91. errorAtCurrent(parser.current.start);
  92. }
  93. }
  94. void consume(TokenType type, const char *message) {
  95. if (parser.current.type == type) {
  96. advance();
  97. return;
  98. }
  99. errorAtCurrent(message);
  100. }
  101. static bool check(TokenType type) {
  102. return parser.current.type == type;
  103. }
  104. static bool match(TokenType type) {
  105. if (!check(type)) return false;
  106. advance();
  107. return true;
  108. }
  109. static void emitByte(uint8_t byte) {
  110. writeChunk(currentChunk(), byte, parser.previous.line);
  111. }
  112. static void emitBytes(uint8_t byte1, uint8_t byte2) {
  113. emitByte(byte1);
  114. emitByte(byte2);
  115. }
  116. static int emitJump(uint8_t instruction) {
  117. emitByte(instruction);
  118. emitByte(0xff);// 占位字节,后面填入需要跳转的 offset
  119. emitByte(0xff);//
  120. return currentChunk()->count - 2;
  121. }
  122. static void emitReturn() {
  123. emitByte(OP_RETURN);
  124. }
  125. static uint8_t makeConstant(Value value) {
  126. int constant = addConstant(currentChunk(), value);
  127. if (constant > UINT8_MAX) {
  128. error("Too many constants in one chunk.");
  129. return 0;
  130. }
  131. return (uint8_t) constant;
  132. }
  133. static void expression() {
  134. parsePrecedence(PREC_ASSIGNMENT);
  135. }
  136. /// for example: var a; var b = 10;
  137. static void varDeclaration() {
  138. uint8_t global = parseVariable("Expect variable name.");
  139. // 变量初始化
  140. if (match(TOKEN_EQUAL)) {
  141. expression();
  142. } else {
  143. emitByte(OP_NIL);
  144. }
  145. consume(TOKEN_SEMICOLON, "Expect ';' after variable declaration.");
  146. defineVariable(global);
  147. }
  148. static void expressionStatement() {
  149. expression();
  150. consume(TOKEN_SEMICOLON, "Expect ';' after expression.");
  151. emitByte(OP_POP);
  152. }
  153. static void patchJump(int offset) {
  154. // -2 to adjust for the bytecode for the jump offset itself.
  155. int jump = currentChunk()->count - offset - 2;
  156. if (jump > UINT16_MAX) error("Too much code to jump over.");
  157. currentChunk()->code[offset] = (jump >> 8) & 0xFF;
  158. currentChunk()->code[offset + 1] = jump & 0xFF;
  159. }
  160. static void ifStatement() {
  161. consume(TOKEN_LEFT_PAREN, "Expect '(' after 'if'.");
  162. expression();
  163. consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition.");
  164. int thenJump = emitJump(OP_JUMP_IF_FALSE);
  165. emitByte(OP_POP);
  166. statement();// then branch statement
  167. int elseJump = emitJump(OP_JUMP);
  168. patchJump(thenJump);
  169. emitByte(OP_POP);
  170. if (match(TOKEN_ELSE)) statement();// else branch statement
  171. patchJump(elseJump);
  172. }
  173. static void printStatement() {
  174. expression();
  175. consume(TOKEN_SEMICOLON, "Expect ';' after value.");
  176. emitByte(OP_PRINT);
  177. }
  178. static void synchronize() {
  179. parser.panicMode = false;
  180. while (parser.current.type != TOKEN_EOF) {
  181. if (parser.previous.type == TOKEN_SEMICOLON) return;
  182. switch (parser.current.type) {
  183. case TOKEN_CLASS:
  184. case TOKEN_FUN:
  185. case TOKEN_VAR:
  186. case TOKEN_FOR:
  187. case TOKEN_IF:
  188. case TOKEN_WHILE:
  189. case TOKEN_PRINT:
  190. case TOKEN_RETURN:
  191. return;
  192. default:;// Do nothing.
  193. }
  194. advance();
  195. }
  196. }
  197. ///declaration → classDecl
  198. /// | funDecl
  199. /// | varDecl
  200. /// | statement ;
  201. static void declaration() {
  202. if (match(TOKEN_VAR)) {
  203. varDeclaration();
  204. } else {
  205. statement();
  206. }
  207. if (parser.panicMode) synchronize();
  208. }
  209. static void block() {
  210. while (!check(TOKEN_RIGHT_BRACE) && !check(TOKEN_EOF)) {
  211. declaration();
  212. }
  213. consume(TOKEN_RIGHT_BRACE, "Expect '}' after block.");
  214. }
  215. ///statement → exprStmt
  216. /// | forStmt
  217. /// | ifStmt
  218. /// | printStmt
  219. /// | returnStmt
  220. /// | whileStmt
  221. /// | block ;
  222. static void statement() {
  223. if (match(TOKEN_PRINT)) {
  224. printStatement();
  225. } else if (match(TOKEN_IF)) {
  226. ifStatement();
  227. } else if (match(TOKEN_LEFT_BRACE)) {
  228. ///! block → "{" declaration* "}" ;
  229. beginScope();
  230. block();
  231. endScope();
  232. } else {
  233. expressionStatement();
  234. }
  235. }
  236. static void beginScope() {
  237. current->scopeDepth++;
  238. }
  239. static void endScope() {
  240. current->scopeDepth--;
  241. while (current->localCount > 0
  242. && current->locals[current->localCount - 1].depth > current->scopeDepth) {
  243. emitByte(OP_POP);
  244. current->localCount--;
  245. }
  246. }
  247. static void emitConstant(Value value) {
  248. emitBytes(OP_CONSTANT, makeConstant(value));
  249. }
  250. static void initCompiler(Compiler *compiler) {
  251. compiler->localCount = 0;
  252. compiler->scopeDepth = 0;
  253. current = compiler;
  254. }
  255. static void endCompiler() {
  256. emitReturn();
  257. #ifdef DEBUG_PRINT_CODE
  258. if (!parser.hadError) {
  259. disassembleChunk(currentChunk(), "code");
  260. }
  261. #endif
  262. }
  263. static void grouping(__attribute__((unused)) bool canAssign) {
  264. expression();
  265. consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression.");
  266. }
  267. /// \brief parse number token
  268. /// Number literals: 123
  269. static void number(__attribute__((unused)) bool canAssign) {
  270. double value = strtod(parser.previous.start, NULL);
  271. emitConstant(NUMBER_VAL(value));
  272. }
  273. static void string(__attribute__((unused)) bool canAssign) {
  274. emitConstant(OBJ_VAL(copyString(parser.previous.start + 1,
  275. parser.previous.length - 2)));
  276. }
  277. static int resolveLocal(Compiler *compile, Token *name) {
  278. for (int i = compile->localCount - 1; i >= 0; i--) {
  279. Local *local = &compile->locals[i];
  280. if (identifiersEqual(name, &local->name)) {
  281. if (local->depth == -1) {
  282. error("Can't read local variable in ints own initializer.");
  283. }
  284. return i;
  285. }
  286. }
  287. return -1;
  288. }
  289. static void namedVariable(Token name, bool canAssign) {
  290. uint8_t getOp, setOp;
  291. int arg = resolveLocal(current, &name);
  292. if (arg != -1) {
  293. getOp = OP_GET_LOCAL;
  294. setOp = OP_SET_LOCAL;
  295. } else {
  296. arg = identifierConstant(&name);
  297. getOp = OP_GET_GLOBAL;
  298. setOp = OP_SET_GLOBAL;
  299. }
  300. if (canAssign && match(TOKEN_EQUAL)) {
  301. // 如 menu.brunch(sunday).beverage = "mimosa";
  302. expression();
  303. emitBytes(setOp, (uint8_t) arg);
  304. } else {
  305. emitBytes(getOp, (uint8_t) arg);
  306. }
  307. }
  308. static void variable(bool canAssign) {
  309. namedVariable(parser.previous, canAssign);
  310. }
  311. /// Unary negation: -123
  312. static void unary(__attribute__((unused)) bool canAssign) {
  313. TokenType operatorType = parser.previous.type;
  314. // Compile the operand.
  315. parsePrecedence(PREC_UNARY);
  316. // Emit the operator instruction.
  317. switch (operatorType) {
  318. case TOKEN_BANG:
  319. emitByte(OP_NOT);
  320. break;
  321. case TOKEN_MINUS:
  322. emitByte(OP_NEGATE);
  323. break;
  324. default: return;// Unreachable.
  325. }
  326. }
  327. /// \brief infix parser
  328. static void binary(__attribute__((unused)) bool canAssign) {
  329. TokenType operatorType = parser.previous.type;
  330. ParseRule *rule = getRule(operatorType);
  331. parsePrecedence((Precedence) rule->precedence + 1);
  332. switch (operatorType) {
  333. case TOKEN_BANG_EQUAL:
  334. emitBytes(OP_EQUAL, OP_NOT);
  335. break;
  336. case TOKEN_EQUAL_EQUAL:
  337. emitByte(OP_EQUAL);
  338. break;
  339. case TOKEN_GREATER:
  340. emitByte(OP_GREATER);
  341. break;
  342. case TOKEN_GREATER_EQUAL:
  343. emitBytes(OP_LESS, OP_NOT);
  344. break;
  345. case TOKEN_LESS:
  346. emitByte(OP_LESS);
  347. break;
  348. case TOKEN_LESS_EQUAL:
  349. emitBytes(OP_GREATER, OP_NOT);
  350. break;
  351. case TOKEN_PLUS:
  352. emitByte(OP_ADD);
  353. break;
  354. case TOKEN_MINUS:
  355. emitByte(OP_SUBTRACT);
  356. break;
  357. case TOKEN_STAR:
  358. emitByte(OP_MULTIPLY);
  359. break;
  360. case TOKEN_SLASH:
  361. emitByte(OP_DIVIDE);
  362. break;
  363. default: return;// Unreachable.
  364. }
  365. }
  366. static void literal(__attribute__((unused)) bool canAssign) {
  367. switch (parser.previous.type) {
  368. case TOKEN_FALSE:
  369. emitByte(OP_FALSE);
  370. break;
  371. case TOKEN_NIL:
  372. emitByte(OP_NIL);
  373. break;
  374. case TOKEN_TRUE:
  375. emitByte(OP_TRUE);
  376. break;
  377. default: return;// Unreachable
  378. }
  379. }
  380. ParseRule rules[] = {
  381. [TOKEN_LEFT_PAREN] = {grouping, NULL, PREC_NONE},
  382. [TOKEN_RIGHT_PAREN] = {NULL, NULL, PREC_NONE},
  383. [TOKEN_LEFT_BRACE] = {NULL, NULL, PREC_NONE},
  384. [TOKEN_RIGHT_BRACE] = {NULL, NULL, PREC_NONE},
  385. [TOKEN_COMMA] = {NULL, NULL, PREC_NONE},
  386. [TOKEN_DOT] = {NULL, NULL, PREC_NONE},
  387. [TOKEN_MINUS] = {unary, binary, PREC_TERM},
  388. [TOKEN_PLUS] = {NULL, binary, PREC_TERM},
  389. [TOKEN_SEMICOLON] = {NULL, NULL, PREC_NONE},
  390. [TOKEN_SLASH] = {NULL, binary, PREC_FACTOR},
  391. [TOKEN_STAR] = {NULL, binary, PREC_FACTOR},
  392. [TOKEN_BANG] = {unary, NULL, PREC_NONE},
  393. [TOKEN_BANG_EQUAL] = {NULL, binary, PREC_EQUALITY},
  394. [TOKEN_EQUAL] = {NULL, NULL, PREC_NONE},
  395. [TOKEN_EQUAL_EQUAL] = {NULL, binary, PREC_EQUALITY},
  396. [TOKEN_GREATER] = {NULL, binary, PREC_COMPARISON},
  397. [TOKEN_GREATER_EQUAL] = {NULL, binary, PREC_COMPARISON},
  398. [TOKEN_LESS] = {NULL, binary, PREC_COMPARISON},
  399. [TOKEN_LESS_EQUAL] = {NULL, binary, PREC_COMPARISON},
  400. [TOKEN_IDENTIFIER] = {variable, NULL, PREC_NONE},
  401. [TOKEN_STRING] = {string, NULL, PREC_NONE},
  402. [TOKEN_NUMBER] = {number, NULL, PREC_NONE},
  403. [TOKEN_AND] = {NULL, NULL, PREC_NONE},
  404. [TOKEN_CLASS] = {NULL, NULL, PREC_NONE},
  405. [TOKEN_ELSE] = {NULL, NULL, PREC_NONE},
  406. [TOKEN_FALSE] = {literal, NULL, PREC_NONE},
  407. [TOKEN_FOR] = {NULL, NULL, PREC_NONE},
  408. [TOKEN_FUN] = {NULL, NULL, PREC_NONE},
  409. [TOKEN_IF] = {NULL, NULL, PREC_NONE},
  410. [TOKEN_NIL] = {literal, NULL, PREC_NONE},
  411. [TOKEN_OR] = {NULL, NULL, PREC_NONE},
  412. [TOKEN_PRINT] = {NULL, NULL, PREC_NONE},
  413. [TOKEN_RETURN] = {NULL, NULL, PREC_NONE},
  414. [TOKEN_SUPER] = {NULL, NULL, PREC_NONE},
  415. [TOKEN_THIS] = {NULL, NULL, PREC_NONE},
  416. [TOKEN_TRUE] = {literal, NULL, PREC_NONE},
  417. [TOKEN_VAR] = {NULL, NULL, PREC_NONE},
  418. [TOKEN_WHILE] = {NULL, NULL, PREC_NONE},
  419. [TOKEN_ERROR] = {NULL, NULL, PREC_NONE},
  420. [TOKEN_EOF] = {NULL, NULL, PREC_NONE},
  421. };
  422. /// \brief 优先级处理
  423. /// \param precedence
  424. static void parsePrecedence(Precedence precedence) {
  425. advance();
  426. ParseFn prefixRule = getRule(parser.previous.type)->prefix;
  427. if (prefixRule == NULL) {
  428. error("Expect expression.");
  429. return;
  430. }
  431. bool canAssign = precedence <= PREC_ASSIGNMENT;
  432. prefixRule(canAssign);///! 执行具体函数
  433. while (precedence <= getRule(parser.current.type)->precedence) {
  434. advance();
  435. ParseFn infixRule = getRule(parser.previous.type)->infix;
  436. infixRule(canAssign);///! 执行具体函数
  437. }
  438. if (canAssign && match(TOKEN_EQUAL)) {
  439. error("Invalid assignment target.");
  440. }
  441. }
  442. static uint8_t identifierConstant(Token *name) {
  443. return makeConstant(OBJ_VAL(copyString(name->start, name->length)));
  444. }
  445. static bool identifiersEqual(Token *a, Token *b) {
  446. if (a->length != b->length) return false;
  447. return memcmp(a->start, b->start, a->length) == 0;
  448. }
  449. static void addLocal(Token name) {
  450. if (current->localCount == UINT8_COUNT) {
  451. error("Too many local variables in function.");
  452. return;
  453. }
  454. Local *local = &current->locals[current->localCount++];
  455. local->name = name;
  456. local->depth = -1;// 未初始化
  457. }
  458. static void declareVariable() {
  459. if (current->scopeDepth == 0) return;
  460. Token *name = &parser.previous;
  461. // we detect the error like:
  462. // {
  463. // var a = "first";
  464. // var a = "second";
  465. // }
  466. for (int i = current->localCount - 1; i >= 0; i--) {
  467. Local *local = &current->locals[i];
  468. if (local->depth != -1 && local->depth < current->scopeDepth) {
  469. break;
  470. }
  471. if (identifiersEqual(name, &local->name)) {
  472. error("Already a variable with the name in this scope.");
  473. }
  474. }
  475. addLocal(*name);
  476. }
  477. static uint8_t parseVariable(const char *errorMessage) {
  478. consume(TOKEN_IDENTIFIER, errorMessage);
  479. declareVariable();// 处理本地变量
  480. if (current->scopeDepth > 0) return 0;
  481. return identifierConstant(&parser.previous);
  482. }
  483. static void defineVariable(uint8_t global) {
  484. if (current->scopeDepth > 0) {
  485. // markInitialized 未初始化时值 为 -1
  486. current->locals[current->localCount - 1].depth = current->scopeDepth;
  487. // 本地变量,直接退出
  488. return;
  489. }
  490. emitBytes(OP_DEFINE_GLOBAL, global);
  491. }
  492. static ParseRule *getRule(TokenType type) {
  493. return &rules[type];
  494. }
  495. /**
  496. ******************************************************************************
  497. * statement → exprStmt
  498. | forStmt
  499. | ifStmt
  500. | printStmt
  501. | returnStmt
  502. | whileStmt
  503. | block ;
  504. block → "{" declaration* "}" ;
  505. declaration → classDecl
  506. | funDecl
  507. | varDecl
  508. | statement ;
  509. ******************************************************************************
  510. */
  511. bool compile(const char *source, Chunk *chunk) {
  512. Compiler compiler;
  513. initScanner(source);
  514. initCompiler(&compiler);
  515. compilingChunk = chunk;
  516. parser.hadError = false;
  517. parser.panicMode = false;
  518. advance();
  519. while (!match(TOKEN_EOF)) {
  520. declaration();
  521. }
  522. endCompiler();//! return opcode
  523. return !parser.hadError;
  524. }