scanner.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /**
  2. ******************************************************************************
  3. * @file : scanner.c
  4. * @author : simon
  5. * @brief : None
  6. * @attention : None
  7. * @date : 2023/8/17
  8. ******************************************************************************
  9. */
  10. #include <stdbool.h>
  11. #include "scanner.h"
  12. #include "common.h"
  13. typedef struct {
  14. const char *start; // marks the beginning of the current lexeme being scanned
  15. const char *current; // current character being looked at
  16. int line;
  17. } Scanner;
  18. Scanner scanner;
  19. void initScanner(const char *source) {
  20. scanner.start = source;
  21. scanner.current = source;
  22. scanner.line = 1;
  23. }
  24. static bool isAlpha(char c) {
  25. return (c >= 'a' && c <= 'z') ||
  26. (c >= 'A' && c <= 'Z') ||
  27. c == '_';
  28. }
  29. static bool isDigit(char c) {
  30. return c >= '0' && c <= '9';
  31. }
  32. static bool isAtEnd() {
  33. return *scanner.current == '\0';
  34. }
  35. static char advance() {
  36. scanner.current++;
  37. return scanner.current[-1];
  38. }
  39. static char peek() {
  40. return *scanner.current;
  41. }
  42. static char peekNext() {
  43. if (isAtEnd()) return '\0';
  44. return scanner.current[1];
  45. }
  46. static bool match(int expected) {
  47. if (isAtEnd()) return false;
  48. if (*scanner.current != expected) return false;
  49. scanner.current++;
  50. return true;
  51. }
  52. static Token makeToken(TokenType type) {
  53. Token token;
  54. token.type = type;
  55. token.start = scanner.start;
  56. token.length = (int) (scanner.current - scanner.start);
  57. token.line = scanner.line;
  58. return token;
  59. }
  60. static Token errorToken(const char *message) {
  61. Token token;
  62. token.type = TOKEN_ERROR;
  63. token.start = message;
  64. token.length = (int) strlen(message);
  65. token.line = scanner.line;
  66. return token;
  67. }
  68. static void skipWhitespace() {
  69. for (;;) {
  70. char c = peek();
  71. switch (c) {
  72. case ' ':
  73. case '\r':
  74. case '\t':advance();
  75. break;
  76. case '\n':scanner.line++;
  77. advance();
  78. break;
  79. case '/':
  80. if (peekNext() == '/') {
  81. // A comment goes until the end of the line.
  82. while (peek() != '\n' && !isAtEnd()) advance();
  83. } else {
  84. return;
  85. }
  86. break;
  87. default:return;
  88. }
  89. }
  90. }
  91. static TokenType checkKeyword(int length, const char *keyStr,
  92. TokenType type) {
  93. if (scanner.current - scanner.start == length &&
  94. memcmp(scanner.start, keyStr, length) == 0) {
  95. return type;
  96. }
  97. return TOKEN_IDENTIFIER;
  98. }
  99. /// \brief keywords or TOKEN_IDENTIFIER
  100. /// \return TokenType
  101. static TokenType identifierType() {
  102. switch (scanner.start[0]) {
  103. case 'a': return checkKeyword(3, "and", TOKEN_AND);
  104. case 'c': return checkKeyword(5, "class", TOKEN_CLASS);
  105. case 'e': return checkKeyword(4, "else", TOKEN_ELSE);
  106. case 'f':
  107. if (scanner.current - scanner.start > 1) {
  108. switch (scanner.start[1]) {
  109. case 'a': return checkKeyword(5, "false", TOKEN_FALSE);
  110. case 'o': return checkKeyword(3, "for", TOKEN_FOR);
  111. case 'u': return checkKeyword(3, "fun", TOKEN_FUN);
  112. }
  113. }
  114. break;
  115. case 'i': return checkKeyword(2, "if", TOKEN_IF);
  116. case 'n': return checkKeyword(3, "nil", TOKEN_NIL);
  117. case 'o': return checkKeyword(2, "or", TOKEN_OR);
  118. case 'p': return checkKeyword(5, "print", TOKEN_PRINT);
  119. case 'r': return checkKeyword(6, "return", TOKEN_RETURN);
  120. case 's': return checkKeyword(5, "super", TOKEN_SUPER);
  121. case 't':
  122. if (scanner.current - scanner.start > 1) {
  123. switch (scanner.start[1]) {
  124. case 'h': return checkKeyword(4, "this", TOKEN_THIS);
  125. case 'r': return checkKeyword(4, "true", TOKEN_TRUE);
  126. }
  127. }
  128. break;
  129. case 'v': return checkKeyword(3, "var", TOKEN_VAR);
  130. case 'w': return checkKeyword(5, "while", TOKEN_WHILE);
  131. }
  132. return TOKEN_IDENTIFIER;
  133. }
  134. static Token identifier() {
  135. while (isAlpha(peek()) || isDigit(peek())) advance();
  136. return makeToken(identifierType());
  137. }
  138. static Token number() {
  139. while (isDigit(peek())) advance();
  140. // 小数部分
  141. if (peek() == '.' && isDigit(peekNext())) {
  142. // Consume the "."
  143. advance();
  144. while (isDigit(peek())) advance();
  145. }
  146. return makeToken(TOKEN_NUMBER);
  147. }
  148. static Token string() {
  149. while (peek() != '"' && !isAtEnd()) {
  150. if (peek() == '\n') scanner.line++;
  151. advance();
  152. }
  153. if (isAtEnd()) return errorToken("unterminated string.");
  154. // The closing quote
  155. advance();
  156. return makeToken(TOKEN_STRING);
  157. }
  158. Token scanToken() {
  159. skipWhitespace();
  160. scanner.start = scanner.current;
  161. if (isAtEnd()) return makeToken(TOKEN_EOF);
  162. char c = advance();
  163. if (isAlpha(c)) return identifier();
  164. if (isDigit(c)) return number();
  165. switch (c) {
  166. case '(': return makeToken(TOKEN_LEFT_PAREN);
  167. case ')': return makeToken(TOKEN_RIGHT_PAREN);
  168. case '{': return makeToken(TOKEN_LEFT_BRACE);
  169. case '}': return makeToken(TOKEN_RIGHT_BRACE);
  170. case ';': return makeToken(TOKEN_SEMICOLON);
  171. case ',': return makeToken(TOKEN_COMMA);
  172. case '.': return makeToken(TOKEN_DOT);
  173. case '-': return makeToken(TOKEN_MINUS);
  174. case '+': return makeToken(TOKEN_PLUS);
  175. case '/': return makeToken(TOKEN_SLASH);
  176. case '*': return makeToken(TOKEN_STAR);
  177. case '!': return makeToken(match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG);
  178. case '=': return makeToken(match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL);
  179. case '<': return makeToken(match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS);
  180. case '>': return makeToken(match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
  181. case '"': return string();
  182. default: return errorToken("Unexpected character.");
  183. }
  184. }