|
|
@@ -0,0 +1,197 @@
|
|
|
+/**
|
|
|
+ ******************************************************************************
|
|
|
+ * @file : scanner.c
|
|
|
+ * @author : simon
|
|
|
+ * @brief : None
|
|
|
+ * @attention : None
|
|
|
+ * @date : 2023/8/17
|
|
|
+ ******************************************************************************
|
|
|
+ */
|
|
|
+#include <stdbool.h>
|
|
|
+#include "scanner.h"
|
|
|
+#include "common.h"
|
|
|
+
|
|
|
+typedef struct {
|
|
|
+ const char *start; // marks the beginning of the current lexeme being scanned
|
|
|
+ const char *current; // current character being looked at
|
|
|
+ int line;
|
|
|
+} Scanner;
|
|
|
+
|
|
|
+Scanner scanner;
|
|
|
+
|
|
|
+void initScanner(const char *source) {
|
|
|
+ scanner.start = source;
|
|
|
+ scanner.current = source;
|
|
|
+ scanner.line = 1;
|
|
|
+}
|
|
|
+static bool isAlpha(char c) {
|
|
|
+ return (c >= 'a' && c <= 'z') ||
|
|
|
+ (c >= 'A' && c <= 'Z') ||
|
|
|
+ c == '_';
|
|
|
+}
|
|
|
+static bool isDigit(char c) {
|
|
|
+ return c >= '0' && c <= '9';
|
|
|
+}
|
|
|
+static bool isAtEnd() {
|
|
|
+ return *scanner.current == '\0';
|
|
|
+}
|
|
|
+static char advance() {
|
|
|
+ scanner.current++;
|
|
|
+ return scanner.current[-1];
|
|
|
+}
|
|
|
+static char peek() {
|
|
|
+ return *scanner.current;
|
|
|
+}
|
|
|
+static char peekNext() {
|
|
|
+ if (isAtEnd()) return '\0';
|
|
|
+ return scanner.current[1];
|
|
|
+}
|
|
|
+static bool match(int expected) {
|
|
|
+ if (isAtEnd()) return false;
|
|
|
+ if (*scanner.current != expected) return false;
|
|
|
+ scanner.current++;
|
|
|
+ return true;
|
|
|
+}
|
|
|
+static Token makeToken(TokenType type) {
|
|
|
+ Token token;
|
|
|
+ token.type = type;
|
|
|
+ token.start = scanner.start;
|
|
|
+ token.length = (int) (scanner.current - scanner.start);
|
|
|
+ token.length = scanner.line;
|
|
|
+ return token;
|
|
|
+}
|
|
|
+static Token errorToken(const char *message) {
|
|
|
+ Token token;
|
|
|
+ token.type = TOKEN_ERROR;
|
|
|
+ token.start = message;
|
|
|
+ token.length = (int) strlen(message);
|
|
|
+ token.length = scanner.line;
|
|
|
+ return token;
|
|
|
+}
|
|
|
+
|
|
|
+static void skipWhitespace() {
|
|
|
+ for (;;) {
|
|
|
+ char c = peek();
|
|
|
+ switch (c) {
|
|
|
+ case ' ':
|
|
|
+ case '\r':
|
|
|
+ case '\t':advance();
|
|
|
+ break;
|
|
|
+ case '\n':scanner.line++;
|
|
|
+ advance();
|
|
|
+ break;
|
|
|
+ case '/':
|
|
|
+ if (peekNext() == '/') {
|
|
|
+ // A comment goes until the end of the line.
|
|
|
+ while (peek() != '\n' && !isAtEnd()) advance();
|
|
|
+ } else {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ default:return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+static TokenType checkKeyword(int length, const char *keyStr,
|
|
|
+ TokenType type) {
|
|
|
+ if (scanner.current - scanner.start == length &&
|
|
|
+ memcmp(scanner.start, keyStr, length) == 0) {
|
|
|
+ return type;
|
|
|
+ }
|
|
|
+
|
|
|
+ return TOKEN_IDENTIFIER;
|
|
|
+}
|
|
|
+/// \brief keywords or TOKEN_IDENTIFIER
|
|
|
+/// \return TokenType
|
|
|
+static TokenType identifierType() {
|
|
|
+ switch (scanner.start[0]) {
|
|
|
+ case 'a': return checkKeyword(3, "and", TOKEN_AND);
|
|
|
+ case 'c': return checkKeyword(5, "class", TOKEN_CLASS);
|
|
|
+ case 'e': return checkKeyword(4, "else", TOKEN_ELSE);
|
|
|
+ case 'f':
|
|
|
+ if (scanner.current - scanner.start > 1) {
|
|
|
+ switch (scanner.start[1]) {
|
|
|
+ case 'a': return checkKeyword(5, "false", TOKEN_FALSE);
|
|
|
+ case 'o': return checkKeyword(3, "for", TOKEN_FOR);
|
|
|
+ case 'u': return checkKeyword(3, "fun", TOKEN_FUN);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case 'i': return checkKeyword(2, "if", TOKEN_IF);
|
|
|
+ case 'n': return checkKeyword(3, "nil", TOKEN_NIL);
|
|
|
+ case 'o': return checkKeyword(2, "or", TOKEN_OR);
|
|
|
+ case 'p': return checkKeyword(5, "print", TOKEN_PRINT);
|
|
|
+ case 'r': return checkKeyword(6, "return", TOKEN_RETURN);
|
|
|
+ case 's': return checkKeyword(5, "super", TOKEN_SUPER);
|
|
|
+ case 't':
|
|
|
+ if (scanner.current - scanner.start > 1) {
|
|
|
+ switch (scanner.start[1]) {
|
|
|
+ case 'h': return checkKeyword(4, "this", TOKEN_THIS);
|
|
|
+ case 'r': return checkKeyword(4, "true", TOKEN_TRUE);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case 'v': return checkKeyword(3, "var", TOKEN_VAR);
|
|
|
+ case 'w': return checkKeyword(5, "while", TOKEN_WHILE);
|
|
|
+ }
|
|
|
+ return TOKEN_IDENTIFIER;
|
|
|
+}
|
|
|
+static Token identifier() {
|
|
|
+ while (isAlpha(peek()) || isDigit(peek())) advance();
|
|
|
+ return makeToken(identifierType());
|
|
|
+}
|
|
|
+static Token number() {
|
|
|
+ while (isDigit(peek())) advance();
|
|
|
+
|
|
|
+ // 小数部分
|
|
|
+ if (peek() == '.' && isDigit(peekNext())) {
|
|
|
+ // Consume the "."
|
|
|
+ advance();
|
|
|
+
|
|
|
+ while (isDigit(peek())) advance();
|
|
|
+ }
|
|
|
+
|
|
|
+ return makeToken(TOKEN_NUMBER);
|
|
|
+}
|
|
|
+static Token string() {
|
|
|
+ while (peek() != '"' && !isAtEnd()) {
|
|
|
+ if (peek() == '\n') scanner.line++;
|
|
|
+ advance();
|
|
|
+ }
|
|
|
+ if (isAtEnd()) return errorToken("unterminated string.");
|
|
|
+
|
|
|
+ // The closing quote
|
|
|
+ advance();
|
|
|
+ return makeToken(TOKEN_STRING);
|
|
|
+}
|
|
|
+Token scanToken() {
|
|
|
+ skipWhitespace();
|
|
|
+ scanner.start = scanner.current;
|
|
|
+
|
|
|
+ if (isAtEnd()) return makeToken(TOKEN_EOF);
|
|
|
+
|
|
|
+ char c = advance();
|
|
|
+ if (isAlpha(c)) return identifier();
|
|
|
+ if (isDigit(c)) return number();
|
|
|
+
|
|
|
+ switch (c) {
|
|
|
+ case '(': return makeToken(TOKEN_LEFT_PAREN);
|
|
|
+ case ')': return makeToken(TOKEN_RIGHT_PAREN);
|
|
|
+ case '{': return makeToken(TOKEN_LEFT_BRACE);
|
|
|
+ case '}': return makeToken(TOKEN_RIGHT_BRACE);
|
|
|
+ case ';': return makeToken(TOKEN_SEMICOLON);
|
|
|
+ case ',': return makeToken(TOKEN_COMMA);
|
|
|
+ case '.': return makeToken(TOKEN_DOT);
|
|
|
+ case '-': return makeToken(TOKEN_MINUS);
|
|
|
+ case '+': return makeToken(TOKEN_PLUS);
|
|
|
+ case '/': return makeToken(TOKEN_SLASH);
|
|
|
+ case '*': return makeToken(TOKEN_STAR);
|
|
|
+ case '!': return makeToken(match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG);
|
|
|
+ case '=': return makeToken(match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL);
|
|
|
+ case '<': return makeToken(match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS);
|
|
|
+ case '>': return makeToken(match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
|
|
|
+ case '"': return string();
|
|
|
+ }
|
|
|
+
|
|
|
+ return errorToken("Unexpected character.");
|
|
|
+}
|