| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198 |
- /**
- ******************************************************************************
- * @file : scanner.c
- * @author : simon
- * @brief : None
- * @attention : None
- * @date : 2023/8/17
- ******************************************************************************
- */
- #include <stdbool.h>
- #include "scanner.h"
- #include "common.h"
- typedef struct {
- const char *start; // marks the beginning of the current lexeme being scanned
- const char *current; // current character being looked at
- int line;
- } Scanner;
- Scanner scanner;
- void initScanner(const char *source) {
- scanner.start = source;
- scanner.current = source;
- scanner.line = 1;
- }
- static bool isAlpha(char c) {
- return (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- c == '_';
- }
- static bool isDigit(char c) {
- return c >= '0' && c <= '9';
- }
- static bool isAtEnd() {
- return *scanner.current == '\0';
- }
- static char advance() {
- scanner.current++;
- return scanner.current[-1];
- }
- static char peek() {
- return *scanner.current;
- }
- static char peekNext() {
- if (isAtEnd()) return '\0';
- return scanner.current[1];
- }
- static bool match(int expected) {
- if (isAtEnd()) return false;
- if (*scanner.current != expected) return false;
- scanner.current++;
- return true;
- }
- static Token makeToken(TokenType type) {
- Token token;
- token.type = type;
- token.start = scanner.start;
- token.length = (int) (scanner.current - scanner.start);
- token.length = scanner.line;
- return token;
- }
- static Token errorToken(const char *message) {
- Token token;
- token.type = TOKEN_ERROR;
- token.start = message;
- token.length = (int) strlen(message);
- token.length = scanner.line;
- return token;
- }
- static void skipWhitespace() {
- for (;;) {
- char c = peek();
- switch (c) {
- case ' ':
- case '\r':
- case '\t':advance();
- break;
- case '\n':scanner.line++;
- advance();
- break;
- case '/':
- if (peekNext() == '/') {
- // A comment goes until the end of the line.
- while (peek() != '\n' && !isAtEnd()) advance();
- } else {
- return;
- }
- break;
- default:return;
- }
- }
- }
- static TokenType checkKeyword(int length, const char *keyStr,
- TokenType type) {
- if (scanner.current - scanner.start == length &&
- memcmp(scanner.start, keyStr, length) == 0) {
- return type;
- }
- return TOKEN_IDENTIFIER;
- }
- /// \brief keywords or TOKEN_IDENTIFIER
- /// \return TokenType
- static TokenType identifierType() {
- switch (scanner.start[0]) {
- case 'a': return checkKeyword(3, "and", TOKEN_AND);
- case 'c': return checkKeyword(5, "class", TOKEN_CLASS);
- case 'e': return checkKeyword(4, "else", TOKEN_ELSE);
- case 'f':
- if (scanner.current - scanner.start > 1) {
- switch (scanner.start[1]) {
- case 'a': return checkKeyword(5, "false", TOKEN_FALSE);
- case 'o': return checkKeyword(3, "for", TOKEN_FOR);
- case 'u': return checkKeyword(3, "fun", TOKEN_FUN);
- }
- }
- break;
- case 'i': return checkKeyword(2, "if", TOKEN_IF);
- case 'n': return checkKeyword(3, "nil", TOKEN_NIL);
- case 'o': return checkKeyword(2, "or", TOKEN_OR);
- case 'p': return checkKeyword(5, "print", TOKEN_PRINT);
- case 'r': return checkKeyword(6, "return", TOKEN_RETURN);
- case 's': return checkKeyword(5, "super", TOKEN_SUPER);
- case 't':
- if (scanner.current - scanner.start > 1) {
- switch (scanner.start[1]) {
- case 'h': return checkKeyword(4, "this", TOKEN_THIS);
- case 'r': return checkKeyword(4, "true", TOKEN_TRUE);
- }
- }
- break;
- case 'v': return checkKeyword(3, "var", TOKEN_VAR);
- case 'w': return checkKeyword(5, "while", TOKEN_WHILE);
- }
- return TOKEN_IDENTIFIER;
- }
- static Token identifier() {
- while (isAlpha(peek()) || isDigit(peek())) advance();
- return makeToken(identifierType());
- }
- static Token number() {
- while (isDigit(peek())) advance();
- // 小数部分
- if (peek() == '.' && isDigit(peekNext())) {
- // Consume the "."
- advance();
- while (isDigit(peek())) advance();
- }
- return makeToken(TOKEN_NUMBER);
- }
- static Token string() {
- while (peek() != '"' && !isAtEnd()) {
- if (peek() == '\n') scanner.line++;
- advance();
- }
- if (isAtEnd()) return errorToken("unterminated string.");
- // The closing quote
- advance();
- return makeToken(TOKEN_STRING);
- }
- Token scanToken() {
- skipWhitespace();
- scanner.start = scanner.current;
- if (isAtEnd()) return makeToken(TOKEN_EOF);
- char c = advance();
- if (isAlpha(c)) return identifier();
- if (isDigit(c)) return number();
- switch (c) {
- case '(': return makeToken(TOKEN_LEFT_PAREN);
- case ')': return makeToken(TOKEN_RIGHT_PAREN);
- case '{': return makeToken(TOKEN_LEFT_BRACE);
- case '}': return makeToken(TOKEN_RIGHT_BRACE);
- case ';': return makeToken(TOKEN_SEMICOLON);
- case ',': return makeToken(TOKEN_COMMA);
- case '.': return makeToken(TOKEN_DOT);
- case '-': return makeToken(TOKEN_MINUS);
- case '+': return makeToken(TOKEN_PLUS);
- case '/': return makeToken(TOKEN_SLASH);
- case '*': return makeToken(TOKEN_STAR);
- case '!': return makeToken(match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG);
- case '=': return makeToken(match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL);
- case '<': return makeToken(match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS);
- case '>': return makeToken(match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
- case '"': return string();
- }
- return errorToken("Unexpected character.");
- }
|