diff --git a/ast.h b/ast.h index e0150e2..e0ca053 100644 --- a/ast.h +++ b/ast.h @@ -1,6 +1,6 @@ #pragma once -struct token; +#include "lexer.h" struct expression { struct token constant; @@ -20,7 +20,7 @@ struct statement_return { struct statement_if_else { struct expression * expression; struct statement * statement_if; - struct statement * statement_else; + struct statement * statement_else; // might be nullptr }; struct statement { @@ -32,7 +32,7 @@ struct statement { }; struct function_definition { - struct token * name; + struct token name; struct statement * statements; }; diff --git a/ast_print.c b/ast_print.c new file mode 100644 index 0000000..51e57d3 --- /dev/null +++ b/ast_print.c @@ -0,0 +1,106 @@ +#include "assert.h" +#include "printf.h" +#include "ast_print.h" + +static void print_indent(int indent) +{ + for (int i = 0; i < indent; i++) { + print_char(' '); + print_char(' '); + } +} + +#define printi(indent, ...) \ + do { \ + print_indent(indent); \ + _printf(__VA_ARGS__); \ + } while (0); + +const char * statement_type_str(enum statement_type type) +{ + static const char * map[] = { + [STATEMENT_RETURN] = "STATEMENT_RETURN", + [STATEMENT_IF_ELSE] = "STATEMENT_IF_ELSE", + }; + assert(((int)type) < ((sizeof (map)) / (sizeof (map[0])))); + return map[(int)type]; +} + +const char * token_type_str(enum token_type type) { + static const char * map[] = { + [TOKEN_INVALID] = "TOKEN_INVALID", + [TOKEN_EOF] = "TOKEN_EOF", + [TOKEN_IDENTIFIER] = "TOKEN_IDENTIFIER", + [TOKEN_CONSTANT] = "TOKEN_CONSTANT", + [TOKEN_INT] = "TOKEN_INT", + [TOKEN_VOID] = "TOKEN_VOID", + [TOKEN_RETURN] = "TOKEN_RETURN", + [TOKEN_LPAREN] = "TOKEN_LPAREN", + [TOKEN_RPAREN] = "TOKEN_RPAREN", + [TOKEN_LBRACE] = "TOKEN_LBRACE", + [TOKEN_RBRACE] = "TOKEN_RBRACE", + [TOKEN_SEMICOLON] = "TOKEN_SEMICOLON", + }; + assert(((int)type) < ((sizeof (map)) / (sizeof (map[0])))); + return map[(int)type]; +} + +void print_token(int indent, struct token token) +{ + printi(indent, "type=%s str=", token_type_str(token.type)); + const uint8_t * bufi = token.start; + while (bufi < token.end) { + print_char((char)*bufi++); + } + print_char('\n'); +} + +void print_expression(int indent, struct expression * expression) +{ + printi(indent, "token=(\n"); + print_token(indent + 1, expression->constant); + printi(indent, ")\n"); +} + +void print_return(int indent, struct statement_return * statement_return) +{ + printi(indent, "expression=(\n"); + print_expression(indent + 1, statement_return->expression); + printi(indent, ")\n"); +} + +void print_if_else(int indent, struct statement_if_else * statement_if_else) +{ + printi(indent, "expression=(\n"); + print_expression(indent + 1, statement_if_else->expression); + printi(indent, ")\n"); + printi(indent, "statement_if=(\n"); + print_statement(indent + 1, statement_if_else->statement_if); + printi(indent, ")\n"); + printi(indent, "statement_else=(\n"); + if (statement_if_else->statement_else == nullptr) { + printi(indent + 1, "nullptr\n"); + } else { + print_statement(indent + 1, statement_if_else->statement_else); + } + printi(indent, ")\n"); +} + +void print_statement(int indent, struct statement * statement) +{ + printi(indent, "statement type=%s (\n", statement_type_str(statement->type)); + switch (statement->type) { + case STATEMENT_RETURN: print_return(indent + 1, statement->statement_return); break; + case STATEMENT_IF_ELSE: print_if_else(indent + 1, statement->statement_if_else); break; + default: + assert(false); + break; + } + printi(indent, ")\n"); +} + +void print_function_definition(int indent, struct statement * statement) +{ + printi(indent, "function_definition name="); + print_token(statement->name); +} diff --git a/ast_print.h b/ast_print.h new file mode 100644 index 0000000..e24f843 --- /dev/null +++ b/ast_print.h @@ -0,0 +1,12 @@ +#pragma once + +#include "ast.h" + +const char * statement_type_str(enum statement_type type); +const char * token_type_str(enum token_type type); + +void print_token(int indent, struct token token); +void print_expression(int indent, struct expression * expression); +void print_return(int indent, struct statement_return * statement_return); +void print_if_else(int indent, struct statement_if_else * statement_if_else); +void print_statement(int indent, struct statement * statement); diff --git a/compiler.mk b/compiler.mk index 65cb20d..44bf0dd 100644 --- a/compiler.mk +++ b/compiler.mk @@ -5,4 +5,5 @@ OBJ = \ printf.o \ string_parse.o \ string_unparse.o \ + ast_print.o \ main_hosted.o diff --git a/lexer.c b/lexer.c index 3251349..5922298 100644 --- a/lexer.c +++ b/lexer.c @@ -80,12 +80,14 @@ static inline enum token_type find_keyword(const uint8_t * buf, int start, int e struct token lexer_next_token(struct lexer_state * state) { struct token token; + int token_start; + int token_end; while (state->offset < state->size && is_whitespace(state->buf[state->offset])) { state->offset += 1; } - token.start = state->offset; + token_start = state->offset; uint8_t c = state->buf[state->offset++]; @@ -108,7 +110,7 @@ struct token lexer_next_token(struct lexer_state * state) break; state->offset += 1; } - token.type = find_keyword(state->buf, token.start, state->offset); + token.type = find_keyword(state->buf, token_start, state->offset); } else if (is_decimal_digit(c)) { while (state->offset < state->size) { uint8_t c = state->buf[state->offset]; @@ -128,10 +130,13 @@ struct token lexer_next_token(struct lexer_state * state) break; } - token.end = state->offset; + token_end = state->offset; - if (token.start >= state->size) + if (token_start >= state->size) token.type = TOKEN_EOF; + token.start = &state->buf[token_start]; + token.end = &state->buf[token_end]; + return token; } diff --git a/lexer.h b/lexer.h index 62b5f94..7155895 100644 --- a/lexer.h +++ b/lexer.h @@ -21,8 +21,8 @@ enum token_type { struct token { enum token_type type; - int start; - int end; + const uint8_t * start; + const uint8_t * end; int value; }; diff --git a/main_hosted.c b/main_hosted.c index 47c9b95..ee5a8cb 100644 --- a/main_hosted.c +++ b/main_hosted.c @@ -4,6 +4,7 @@ #include #include "lexer.h" +#include "ast_print.h" int read_file(const char * filename, uint8_t ** buf) { @@ -50,21 +51,6 @@ int read_file(const char * filename, uint8_t ** buf) return size; } -const char * token_str[] = { - [TOKEN_INVALID] = "TOKEN_INVALID", - [TOKEN_EOF] = "TOKEN_EOF", - [TOKEN_IDENTIFIER] = "TOKEN_IDENTIFIER", - [TOKEN_CONSTANT] = "TOKEN_CONSTANT", - [TOKEN_INT] = "TOKEN_INT", - [TOKEN_VOID] = "TOKEN_VOID", - [TOKEN_RETURN] = "TOKEN_RETURN", - [TOKEN_LPAREN] = "TOKEN_LPAREN", - [TOKEN_RPAREN] = "TOKEN_RPAREN", - [TOKEN_LBRACE] = "TOKEN_LBRACE", - [TOKEN_RBRACE] = "TOKEN_RBRACE", - [TOKEN_SEMICOLON] = "TOKEN_SEMICOLON", -}; - int main(int argc, char * argv[]) { if (argc != 3) { @@ -89,7 +75,7 @@ int main(int argc, char * argv[]) while (true) { struct token token = lexer_next_token(&lexer_state); - printf("%s\n", token_str[token.type]); + printf("%s\n", token_type_str(token.type)); if (token.type == TOKEN_INVALID) return EXIT_FAILURE; if (token.type == TOKEN_EOF) diff --git a/print_class b/print_class index 7acecf9..7923c88 100755 Binary files a/print_class and b/print_class differ