add ast_print

2025-02-24 22:12:13 -06:00 · 2025-02-24 22:12:13 -06:00 · 396b51b3a7
commit 396b51b3a7
parent 19d79a7914
8 changed files with 135 additions and 25 deletions
--- a/ast.h
+++ b/ast.h
@ -1,6 +1,6 @@
 #pragma once
-struct token;
+#include "lexer.h"
 struct expression {
  struct token constant;
@ -20,7 +20,7 @@ struct statement_return {
 struct statement_if_else {
  struct expression * expression;
  struct statement * statement_if;
-  struct statement * statement_else;
+  struct statement * statement_else; // might be nullptr
 };
 struct statement {
@ -32,7 +32,7 @@ struct statement {
 };
 struct function_definition {
-  struct token * name;
+  struct token name;
  struct statement * statements;
 };
--- a/ast_print.c
+++ b/ast_print.c
@ -0,0 +1,106 @@
 #include "assert.h"
 #include "printf.h"
 #include "ast_print.h"
 static void print_indent(int indent)
 {
  for (int i = 0; i < indent; i++) {
    print_char(' ');
    print_char(' ');
  }
 }
 #define printi(indent, ...)                     \
  do {                                          \
    print_indent(indent);                       \
    _printf(__VA_ARGS__);                       \
  } while (0);
 const char * statement_type_str(enum statement_type type)
 {
  static const char * map[] = {
    [STATEMENT_RETURN] = "STATEMENT_RETURN",
    [STATEMENT_IF_ELSE] = "STATEMENT_IF_ELSE",
  };
  assert(((int)type) < ((sizeof (map)) / (sizeof (map[0]))));
  return map[(int)type];
 }
 const char * token_type_str(enum token_type type) {
  static const char * map[] = {
    [TOKEN_INVALID] = "TOKEN_INVALID",
    [TOKEN_EOF] = "TOKEN_EOF",
    [TOKEN_IDENTIFIER] = "TOKEN_IDENTIFIER",
    [TOKEN_CONSTANT] = "TOKEN_CONSTANT",
    [TOKEN_INT] = "TOKEN_INT",
    [TOKEN_VOID] = "TOKEN_VOID",
    [TOKEN_RETURN] = "TOKEN_RETURN",
    [TOKEN_LPAREN] = "TOKEN_LPAREN",
    [TOKEN_RPAREN] = "TOKEN_RPAREN",
    [TOKEN_LBRACE] = "TOKEN_LBRACE",
    [TOKEN_RBRACE] = "TOKEN_RBRACE",
    [TOKEN_SEMICOLON] = "TOKEN_SEMICOLON",
  };
  assert(((int)type) < ((sizeof (map)) / (sizeof (map[0]))));
  return map[(int)type];
 }
 void print_token(int indent, struct token token)
 {
  printi(indent, "type=%s str=", token_type_str(token.type));
  const uint8_t * bufi = token.start;
  while (bufi < token.end) {
    print_char((char)*bufi++);
  }
  print_char('\n');
 }
 void print_expression(int indent, struct expression * expression)
 {
  printi(indent, "token=(\n");
  print_token(indent + 1, expression->constant);
  printi(indent, ")\n");
 }
 void print_return(int indent, struct statement_return * statement_return)
 {
  printi(indent, "expression=(\n");
  print_expression(indent + 1, statement_return->expression);
  printi(indent, ")\n");
 }
 void print_if_else(int indent, struct statement_if_else * statement_if_else)
 {
  printi(indent, "expression=(\n");
  print_expression(indent + 1, statement_if_else->expression);
  printi(indent, ")\n");
  printi(indent, "statement_if=(\n");
  print_statement(indent + 1, statement_if_else->statement_if);
  printi(indent, ")\n");
  printi(indent, "statement_else=(\n");
  if (statement_if_else->statement_else == nullptr) {
    printi(indent + 1, "nullptr\n");
  } else {
    print_statement(indent + 1, statement_if_else->statement_else);
  }
  printi(indent, ")\n");
 }
 void print_statement(int indent, struct statement * statement)
 {
  printi(indent, "statement type=%s (\n", statement_type_str(statement->type));
  switch (statement->type) {
  case STATEMENT_RETURN:  print_return(indent + 1, statement->statement_return); break;
  case STATEMENT_IF_ELSE: print_if_else(indent + 1, statement->statement_if_else); break;
  default:
    assert(false);
    break;
  }
  printi(indent, ")\n");
 }
 void print_function_definition(int indent, struct statement * statement)
 {
  printi(indent, "function_definition name=");
  print_token(statement->name);
 }
--- a/ast_print.h
+++ b/ast_print.h
@ -0,0 +1,12 @@
 #pragma once
 #include "ast.h"
 const char * statement_type_str(enum statement_type type);
 const char * token_type_str(enum token_type type);
 void print_token(int indent, struct token token);
 void print_expression(int indent, struct expression * expression);
 void print_return(int indent, struct statement_return * statement_return);
 void print_if_else(int indent, struct statement_if_else * statement_if_else);
 void print_statement(int indent, struct statement * statement);
--- a/compiler.mk
+++ b/compiler.mk
@ -5,4 +5,5 @@ OBJ = \
 	printf.o \
 	string_parse.o \
 	string_unparse.o \
 	ast_print.o \
 	main_hosted.o
--- a/lexer.c
+++ b/lexer.c
@ -80,12 +80,14 @@ static inline enum token_type find_keyword(const uint8_t * buf, int start, int e
 struct token lexer_next_token(struct lexer_state * state)
 {
  struct token token;
  int token_start;
  int token_end;
  while (state->offset < state->size && is_whitespace(state->buf[state->offset])) {
    state->offset += 1;
  }
-  token.start = state->offset;
+  token_start = state->offset;
  uint8_t c = state->buf[state->offset++];
@ -108,7 +110,7 @@ struct token lexer_next_token(struct lexer_state * state)
          break;
        state->offset += 1;
      }
-      token.type = find_keyword(state->buf, token.start, state->offset);
+      token.type = find_keyword(state->buf, token_start, state->offset);
    } else if (is_decimal_digit(c)) {
      while (state->offset < state->size) {
        uint8_t c = state->buf[state->offset];
@ -128,10 +130,13 @@ struct token lexer_next_token(struct lexer_state * state)
    break;
  }
-  token.end = state->offset;
+  token_end = state->offset;
-  if (token.start >= state->size)
+  if (token_start >= state->size)
    token.type = TOKEN_EOF;
  token.start = &state->buf[token_start];
  token.end = &state->buf[token_end];
  return token;
 }
--- a/lexer.h
+++ b/lexer.h
@ -21,8 +21,8 @@ enum token_type {
 struct token {
  enum token_type type;
-  int start;
+  const uint8_t * start;
-  int end;
+  const uint8_t * end;
  int value;
 };
--- a/main_hosted.c
+++ b/main_hosted.c
@ -4,6 +4,7 @@
 #include <stdlib.h>
 #include "lexer.h"
 #include "ast_print.h"
 int read_file(const char * filename, uint8_t ** buf)
 {
@ -50,21 +51,6 @@ int read_file(const char * filename, uint8_t ** buf)
  return size;
 }
 const char * token_str[] = {
  [TOKEN_INVALID] = "TOKEN_INVALID",
  [TOKEN_EOF] = "TOKEN_EOF",
  [TOKEN_IDENTIFIER] = "TOKEN_IDENTIFIER",
  [TOKEN_CONSTANT] = "TOKEN_CONSTANT",
  [TOKEN_INT] = "TOKEN_INT",
  [TOKEN_VOID] = "TOKEN_VOID",
  [TOKEN_RETURN] = "TOKEN_RETURN",
  [TOKEN_LPAREN] = "TOKEN_LPAREN",
  [TOKEN_RPAREN] = "TOKEN_RPAREN",
  [TOKEN_LBRACE] = "TOKEN_LBRACE",
  [TOKEN_RBRACE] = "TOKEN_RBRACE",
  [TOKEN_SEMICOLON] = "TOKEN_SEMICOLON",
 };
 int main(int argc, char * argv[])
 {
  if (argc != 3) {
@ -89,7 +75,7 @@ int main(int argc, char * argv[])
  while (true) {
    struct token token = lexer_next_token(&lexer_state);
-    printf("%s\n", token_str[token.type]);
+    printf("%s\n", token_type_str(token.type));
    if (token.type == TOKEN_INVALID)
      return EXIT_FAILURE;
    if (token.type == TOKEN_EOF)
--- a/BIN
+++ b/BIN