소스 검색

Scanner and Compiler

runningwater 2 년 전
부모
커밋
0b8e8296ee
9개의 변경된 파일367개의 추가작업 그리고 28개의 파일을 삭제
  1. 2 0
      README.md
  2. 2 0
      common.h
  3. 39 0
      compiler.c
  4. 18 0
      compiler.h
  5. 58 22
      main.c
  6. 197 0
      scanner.c
  7. 45 0
      scanner.h
  8. 4 4
      vm.c
  9. 2 2
      vm.h

+ 2 - 0
README.md

@@ -0,0 +1,2 @@
+
+Start -> Source code -> SCANNER  -> Tokens -> COMPILER -> Bytecode chunk -> VM

+ 2 - 0
common.h

@@ -9,6 +9,8 @@
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 
 #define DEBUG_TRACE_EXECUTION
 

+ 39 - 0
compiler.c

@@ -0,0 +1,39 @@
+/**
+  ******************************************************************************
+  * @file           : compiler.c
+  * @author         : simon
+  * @brief          : None
+  * @attention      : None
+  * @date           : 2023/8/17
+  ******************************************************************************
+  */
+#include "common.h"
+#include "compiler.h"
+#include "scanner.h"
+
+void compile(const char *source) {
+  initScanner(source);
+  int line = -1;
+  /// \brief 如 print 1 + 2; 则会输出
+  ///   1 31 'print'
+  ///   | 21 '1'
+  ///   |  7 '+'
+  ///   | 21 '2'
+  ///   |  8 ';'
+  ///   2 39 ''       // <EOF token>
+  for (;;) {
+    Token token = scanToken();
+    if (token.line != line) {
+      printf("%4d ", token.line);
+      line = token.line;
+    } else {
+      printf("   | ");
+    }
+    /// %.*s: *用来指定宽度,对应一个整数。
+    /// .(点)与后面的数合起来 是指定必须输出这个宽度,
+    /// 如果所输出的字符串长度大于这个数,则按此宽度输出,如果小于,则输出实际长度
+    printf("%2d '%.*s'\n", token.type, token.length, token.start);
+
+    if (token.type == TOKEN_EOF) break;
+  }
+}

+ 18 - 0
compiler.h

@@ -0,0 +1,18 @@
+/**
+  ******************************************************************************
+  * @file           : compiler.h
+  * @author         : simon
+  * @brief          : None
+  * @attention      : None
+  * @date           : 2023/8/17
+  ******************************************************************************
+  */
+
+#ifndef CLOX__COMPILER_H_
+#define CLOX__COMPILER_H_
+
+/// \brief 编译
+/// \param source 源代码
+void compile(const char *source);
+
+#endif //CLOX__COMPILER_H_

+ 58 - 22
main.c

@@ -1,8 +1,9 @@
 #include "common.h"
-#include "chunk.h"
-#include "debug.h"
 #include "vm.h"
 
+static void repl();
+static void runFile(const char *);
+static char *readFile(const char *path);
 /*!
  * @brief 程序主入口
  * @param argc
@@ -11,31 +12,66 @@
  */
 int main(int argc, char *argv[]) {
   initVM();
-  Chunk chunk;
-  initChunk(&chunk);
 
-  int constant = addConstant(&chunk, 1.2);
-  writeChunk(&chunk, OP_CONSTANT, 123);
-  writeChunk(&chunk, constant, 123);
+  if (argc == 1) {
+    repl();
+  } else if (argc == 2) {
+    runFile(argv[1]);
+  } else {
+    fprintf(stderr, "Usage: clox [path]\n");
+    exit(64);
+  }
 
-  constant = addConstant(&chunk, 3.4);
-  writeChunk(&chunk, OP_CONSTANT, 123);
-  writeChunk(&chunk, constant, 123);
+  freeVM();
+  return 0;
+}
+static void runFile(const char *path) {
+  char *source = readFile(path);
+  InterpretResult result = interpret(source);
+  free(source);
+
+  if (result == INTERPRET_COMPILE_ERROR) exit(65);
+  if (result == INTERPRET_RUNTIME_ERROR) exit(70);
+}
+/// \brief 读取整个文件
+/// \param path 文件路径
+/// \return byteBuffer
+static char *readFile(const char *path) {
+  FILE *file = fopen(path, "rb");
+  if (file == NULL) {
+    fprintf(stderr, "Could not open file \"%s\".\n", path);
+    exit(74);
+  }
 
-  writeChunk(&chunk, OP_ADD, 123);
+  fseek(file, 0L, SEEK_END);
+  size_t fileSize = ftell(file);
+  rewind(file);
 
-  constant = addConstant(&chunk, 5.6);
-  writeChunk(&chunk, OP_CONSTANT, 123);
-  writeChunk(&chunk, constant, 123);
+  char *buffer = (char *) malloc(fileSize + 1);
+  if (buffer == NULL) {
+    fprintf(stderr, "Not enough memory to read \"%s\".\n", path);
+    exit(74);
+  }
+  size_t bytesRead = fread(buffer, sizeof(char), fileSize, file);
+  if (bytesRead < fileSize) {
+    fprintf(stderr, "Could not read file \"%s\".\n", path);
+    exit(74);
+  }
+  buffer[bytesRead] = '\0';
 
-  writeChunk(&chunk, OP_DIVIDE, 123);
-  writeChunk(&chunk, OP_NEGATE, 123);
+  fclose(file);
+  return buffer;
+}
+static void repl() {
+  char line[1024];
+  for (;;) {
+    printf("> ");
 
-  writeChunk(&chunk, OP_RETURN, 123);
+    if (!fgets(line, sizeof(line), stdin)) {
+      printf("\n");
+      break;
+    }
 
-  disassembleChunk(&chunk, "test chunk");
-  interpret(&chunk);
-  freeVM();
-  freeChunk(&chunk);
-  return 0;
+    interpret(line);
+  }
 }

+ 197 - 0
scanner.c

@@ -0,0 +1,197 @@
+/**
+  ******************************************************************************
+  * @file           : scanner.c
+  * @author         : simon
+  * @brief          : None
+  * @attention      : None
+  * @date           : 2023/8/17
+  ******************************************************************************
+  */
+#include <stdbool.h>
+#include "scanner.h"
+#include "common.h"
+
+typedef struct {
+  const char *start; // marks the beginning of the current lexeme being scanned
+  const char *current; // current character being looked at
+  int line;
+} Scanner;
+
+Scanner scanner;
+
+void initScanner(const char *source) {
+  scanner.start = source;
+  scanner.current = source;
+  scanner.line = 1;
+}
+static bool isAlpha(char c) {
+  return (c >= 'a' && c <= 'z') ||
+      (c >= 'A' && c <= 'Z') ||
+      c == '_';
+}
+static bool isDigit(char c) {
+  return c >= '0' && c <= '9';
+}
+static bool isAtEnd() {
+  return *scanner.current == '\0';
+}
+static char advance() {
+  scanner.current++;
+  return scanner.current[-1];
+}
+static char peek() {
+  return *scanner.current;
+}
+static char peekNext() {
+  if (isAtEnd()) return '\0';
+  return scanner.current[1];
+}
+static bool match(int expected) {
+  if (isAtEnd()) return false;
+  if (*scanner.current != expected) return false;
+  scanner.current++;
+  return true;
+}
+static Token makeToken(TokenType type) {
+  Token token;
+  token.type = type;
+  token.start = scanner.start;
+  token.length = (int) (scanner.current - scanner.start);
+  token.length = scanner.line;
+  return token;
+}
+static Token errorToken(const char *message) {
+  Token token;
+  token.type = TOKEN_ERROR;
+  token.start = message;
+  token.length = (int) strlen(message);
+  token.length = scanner.line;
+  return token;
+}
+
+static void skipWhitespace() {
+  for (;;) {
+    char c = peek();
+    switch (c) {
+      case ' ':
+      case '\r':
+      case '\t':advance();
+        break;
+      case '\n':scanner.line++;
+        advance();
+        break;
+      case '/':
+        if (peekNext() == '/') {
+          // A comment goes until the end of the line.
+          while (peek() != '\n' && !isAtEnd()) advance();
+        } else {
+          return;
+        }
+        break;
+      default:return;
+    }
+  }
+}
+static TokenType checkKeyword(int length, const char *keyStr,
+                              TokenType type) {
+  if (scanner.current - scanner.start == length &&
+      memcmp(scanner.start, keyStr, length) == 0) {
+    return type;
+  }
+
+  return TOKEN_IDENTIFIER;
+}
+/// \brief keywords or TOKEN_IDENTIFIER
+/// \return TokenType
+static TokenType identifierType() {
+  switch (scanner.start[0]) {
+    case 'a': return checkKeyword(3, "and", TOKEN_AND);
+    case 'c': return checkKeyword(5, "class", TOKEN_CLASS);
+    case 'e': return checkKeyword(4, "else", TOKEN_ELSE);
+    case 'f':
+      if (scanner.current - scanner.start > 1) {
+        switch (scanner.start[1]) {
+          case 'a': return checkKeyword(5, "false", TOKEN_FALSE);
+          case 'o': return checkKeyword(3, "for", TOKEN_FOR);
+          case 'u': return checkKeyword(3, "fun", TOKEN_FUN);
+        }
+      }
+      break;
+    case 'i': return checkKeyword(2, "if", TOKEN_IF);
+    case 'n': return checkKeyword(3, "nil", TOKEN_NIL);
+    case 'o': return checkKeyword(2, "or", TOKEN_OR);
+    case 'p': return checkKeyword(5, "print", TOKEN_PRINT);
+    case 'r': return checkKeyword(6, "return", TOKEN_RETURN);
+    case 's': return checkKeyword(5, "super", TOKEN_SUPER);
+    case 't':
+      if (scanner.current - scanner.start > 1) {
+        switch (scanner.start[1]) {
+          case 'h': return checkKeyword(4, "this", TOKEN_THIS);
+          case 'r': return checkKeyword(4, "true", TOKEN_TRUE);
+        }
+      }
+      break;
+    case 'v': return checkKeyword(3, "var", TOKEN_VAR);
+    case 'w': return checkKeyword(5, "while", TOKEN_WHILE);
+  }
+  return TOKEN_IDENTIFIER;
+}
+static Token identifier() {
+  while (isAlpha(peek()) || isDigit(peek())) advance();
+  return makeToken(identifierType());
+}
+static Token number() {
+  while (isDigit(peek())) advance();
+
+  // 小数部分
+  if (peek() == '.' && isDigit(peekNext())) {
+    // Consume the "."
+    advance();
+
+    while (isDigit(peek())) advance();
+  }
+
+  return makeToken(TOKEN_NUMBER);
+}
+static Token string() {
+  while (peek() != '"' && !isAtEnd()) {
+    if (peek() == '\n') scanner.line++;
+    advance();
+  }
+  if (isAtEnd()) return errorToken("unterminated string.");
+
+  // The closing quote
+  advance();
+  return makeToken(TOKEN_STRING);
+}
+Token scanToken() {
+  skipWhitespace();
+  scanner.start = scanner.current;
+
+  if (isAtEnd()) return makeToken(TOKEN_EOF);
+
+  char c = advance();
+  if (isAlpha(c)) return identifier();
+  if (isDigit(c)) return number();
+
+  switch (c) {
+    case '(': return makeToken(TOKEN_LEFT_PAREN);
+    case ')': return makeToken(TOKEN_RIGHT_PAREN);
+    case '{': return makeToken(TOKEN_LEFT_BRACE);
+    case '}': return makeToken(TOKEN_RIGHT_BRACE);
+    case ';': return makeToken(TOKEN_SEMICOLON);
+    case ',': return makeToken(TOKEN_COMMA);
+    case '.': return makeToken(TOKEN_DOT);
+    case '-': return makeToken(TOKEN_MINUS);
+    case '+': return makeToken(TOKEN_PLUS);
+    case '/': return makeToken(TOKEN_SLASH);
+    case '*': return makeToken(TOKEN_STAR);
+    case '!': return makeToken(match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG);
+    case '=': return makeToken(match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL);
+    case '<': return makeToken(match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS);
+    case '>': return makeToken(match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
+    case '"': return string();
+  }
+
+  return errorToken("Unexpected character.");
+}

+ 45 - 0
scanner.h

@@ -0,0 +1,45 @@
+/**
+  ******************************************************************************
+  * @file           : scanner.h
+  * @author         : simon
+  * @brief          : None
+  * @attention      : None
+  * @date           : 2023/8/17
+  ******************************************************************************
+  */
+
+#ifndef CLOX__SCANNER_H_
+#define CLOX__SCANNER_H_
+typedef enum {
+  /// Single-character tokens.
+  TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN,
+  TOKEN_LEFT_BRACE, TOKEN_RIGHT_BRACE,
+  TOKEN_COMMA, TOKEN_DOT, TOKEN_MINUS, TOKEN_PLUS,
+  TOKEN_SEMICOLON, TOKEN_SLASH, TOKEN_STAR,
+  /// One or two character tokens.
+  TOKEN_BANG, TOKEN_BANG_EQUAL,
+  TOKEN_EQUAL, TOKEN_EQUAL_EQUAL,
+  TOKEN_GREATER, TOKEN_GREATER_EQUAL,
+  TOKEN_LESS, TOKEN_LESS_EQUAL,
+  /// Literals.
+  TOKEN_IDENTIFIER, TOKEN_STRING, TOKEN_NUMBER,
+  /// Keywords.
+  TOKEN_AND, TOKEN_CLASS, TOKEN_ELSE, TOKEN_FALSE,
+  TOKEN_FOR, TOKEN_FUN, TOKEN_IF, TOKEN_NIL, TOKEN_OR,
+  TOKEN_PRINT, TOKEN_RETURN, TOKEN_SUPER, TOKEN_THIS,
+  TOKEN_TRUE, TOKEN_VAR, TOKEN_WHILE,
+
+  TOKEN_ERROR, TOKEN_EOF
+} TokenType;
+
+typedef struct {
+  TokenType type;
+  const char *start;
+  int length;
+  int line;
+} Token;
+
+void initScanner(const char *source);
+Token scanToken();
+
+#endif //CLOX__SCANNER_H_

+ 4 - 4
vm.c

@@ -10,6 +10,7 @@
 #include "common.h"
 #include "vm.h"
 #include "debug.h"
+#include "compiler.h"
 
 VM vm;
 
@@ -81,10 +82,9 @@ static InterpretResult run() {
 void freeVM() {
 
 }
-InterpretResult interpret(Chunk *chunk) {
-  vm.chunk = chunk;
-  vm.ip = vm.chunk->code;
-  return run();
+InterpretResult interpret(const char *source) {
+  compile(source);
+  return INTERPRET_OK;
 }
 void push(Value value) {
   *vm.stackTop = value;

+ 2 - 2
vm.h

@@ -31,9 +31,9 @@ typedef enum {
 void initVM();
 void freeVM();
 /// \brief interpret 执行指令
-/// \param chunk 指令块
+/// \param source 源代码
 /// \return InterpretResult
-InterpretResult interpret(Chunk *chunk);
+InterpretResult interpret(const char *source);
 void push(Value value);
 Value pop();