diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6142305 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +*.d diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..16d8666 --- /dev/null +++ b/Makefile @@ -0,0 +1,43 @@ +%.csv: %.ods + libreoffice --headless --convert-to csv:"Text - txt - csv (StarCalc)":44,34,76,,,,true --outdir $(dir $@) $< + +include compiler.mk + +MAKEFILE_PATH := $(patsubst %/,%,$(dir $(abspath $(firstword $(MAKEFILE_LIST))))) +CC ?= gcc +ARCH = -m32 +CFLAGS += -Wall -Werror -Wfatal-errors -Wno-error=unused-variable -fstack-protector -std=c2x -g +CFLAGS += -I$(MAKEFILE_PATH)/ +CFLAGS += -I$(MAKEFILE_PATH)/c +CFLAGS += -DDEBUG +#CFLAGS += -DDEBUG_PRINT +LDFLAGS = -lm +OPT ?= -O0 +DEPFLAGS = -MMD -MP + +%.o: %.c + $(CC) $(ARCH) $(CFLAGS) $(OPT) $(DEPFLAGS) -MF ${<}.d -c $< -o $@ + +print_class: $(OBJ) $(PRINT_CLASS_OBJ) + $(CC) $(ARCH) $(LDFLAGS) $^ -o $@ + +main: $(OBJ) $(MAIN_HOSTED_OBJ) + $(CC) $(ARCH) $(LDFLAGS) $^ -o $@ + +clean: + rm -f main print_class *.elf *.bin + find -P \ + -regextype posix-egrep \ + -regex '.*\.(o|d|gch)$$' \ + -exec rm {} \; + +.SUFFIXES: +.INTERMEDIATE: +.SECONDARY: +.PHONY: all clean phony + +%: RCS/%,v +%: RCS/% +%: %,v +%: s.% +%: SCCS/s.% diff --git a/assert_hosted.h b/assert_hosted.h index 09dc5c0..599d08e 100644 --- a/assert_hosted.h +++ b/assert_hosted.h @@ -13,4 +13,4 @@ extern void __assert_fail (const char *__assertion, const char *__file, }) #define fail(expr) \ - (__assert_fail (#expr, __FILE__, __LINE__, __ASSERT_FUNCTION);) + (__assert_fail (#expr, __FILE__, __LINE__, __ASSERT_FUNCTION)) diff --git a/ast.h b/ast.h index 982c95d..e0150e2 100644 --- a/ast.h +++ b/ast.h @@ -3,12 +3,11 @@ struct token; struct expression { - struct token * constant; + struct token constant; }; enum statement_type { STATEMENT_RETURN, - STATEMENT_IF, STATEMENT_IF_ELSE, }; @@ -18,21 +17,16 @@ struct statement_return { struct expression * expression; }; -struct statement_if { - struct expression * expression; - struct statement * statement; -}; - struct statement_if_else { struct expression * expression; - struct statement * statement; + struct statement * statement_if; + struct statement * statement_else; }; struct statement { enum statement_type type; union { struct statement_return * statement_return; - struct statement_if * statement_if; struct statement_if_else * statement_if_else; }; }; diff --git a/compiler.mk b/compiler.mk new file mode 100644 index 0000000..65cb20d --- /dev/null +++ b/compiler.mk @@ -0,0 +1,8 @@ +OBJ = \ + lexer.o \ + parser.o \ + malloc.o \ + printf.o \ + string_parse.o \ + string_unparse.o \ + main_hosted.o diff --git a/lexer.c b/lexer.c index 2fe7774..3251349 100644 --- a/lexer.c +++ b/lexer.c @@ -40,6 +40,16 @@ static const struct keyword_desc keywords[] = { .length = 6, .token_type = TOKEN_RETURN, }, + { + .buf = (const uint8_t *)"if", + .length = 2, + .token_type = TOKEN_RETURN, + }, + { + .buf = (const uint8_t *)"else", + .length = 4, + .token_type = TOKEN_RETURN, + }, }; static inline bool keyword_equal(const uint8_t * buf, int start, int end, const struct keyword_desc * keyword) diff --git a/lexer.h b/lexer.h index d7cd6b3..62b5f94 100644 --- a/lexer.h +++ b/lexer.h @@ -10,6 +10,8 @@ enum token_type { TOKEN_INT, TOKEN_VOID, TOKEN_RETURN, + TOKEN_IF, + TOKEN_ELSE, TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_LBRACE, diff --git a/malloc.h b/malloc.h new file mode 100644 index 0000000..dca962f --- /dev/null +++ b/malloc.h @@ -0,0 +1,7 @@ +#pragma once + +#include + +#define malloct(t) (malloc_class_arena((sizeof (t)))) + +void * malloc_class_arena(uint32_t size); diff --git a/minmax.h b/minmax.h new file mode 100644 index 0000000..df4c506 --- /dev/null +++ b/minmax.h @@ -0,0 +1,4 @@ +#pragma once + +#define min(a, b) ( (a < b) ? a : b ) +#define max(a, b) ( (a > b) ? a : b ) diff --git a/parser.c b/parser.c index 2de949d..72d528c 100644 --- a/parser.c +++ b/parser.c @@ -1,19 +1,14 @@ #include "parser.h" -#include "lexer.h" #include "assert.h" - -struct token_reader { - bool have_token; - struct token token; - struct lexer_state lexer_state; -}; +#include "printf.h" +#include "malloc.h" struct token peek(struct token_reader * reader) { if (reader->have_token) { return reader->token; } else { - reader->token = lexer_next_token(reader->lexer_state); + reader->token = lexer_next_token(&reader->lexer_state); reader->have_token = true; return reader->token; } @@ -23,13 +18,73 @@ struct token consume(struct token_reader * reader) { struct token token = peek(reader); reader->have_token = false; + return token; } -void expect_type(struct token_reader * reader, enum token_type token_type) +bool match_type(struct token_reader * reader, enum token_type token_type) { - struct token token = consume(reader); - if (!(token->type == token_type)) { - printf("token->type=%d token_type=%d\n", token->type, token_type); - fail(token->type == token_type); + struct token token = peek(reader); + if (token.type == token_type) { + consume(reader); + return true; + } else { + return false; } } + +struct token expect_type(struct token_reader * reader, enum token_type token_type) +{ + struct token token = consume(reader); + if (!(token.type == token_type)) { + printf("token.type=%d token_type=%d\n", token.type, token_type); + fail(token.type == token_type); + } + return token; +} + +struct expression * parse_expression(struct token_reader * reader) +{ + struct expression * expr = malloct(struct expression); + struct token token = expect_type(reader, TOKEN_CONSTANT); + expr->constant = token; + return expr; +} + +struct statement_return * parse_statement_return(struct token_reader * reader) +{ + struct expression * expr = parse_expression(reader); + struct statement_return * stmt = malloct(struct statement_return); + stmt->expression = expr; + return stmt; +} + +struct statement_if_else * parse_statement_if_else(struct token_reader * reader) +{ + struct statement_if_else * stmt = malloct(struct statement_if_else); + expect_type(reader, TOKEN_LPAREN); + struct expression * expr = parse_expression(reader); + expect_type(reader, TOKEN_RPAREN); + struct statement * statement_if = parse_statement(reader); + struct statement * statement_else = match_type(reader, TOKEN_ELSE) ? parse_statement(reader) : nullptr; + stmt->expression = expr; + stmt->statement_if = statement_if; + stmt->statement_else = statement_else; + return stmt; +} + +struct statement * parse_statement(struct token_reader * reader) +{ + struct statement * stmt = malloct(struct statement); + if (match_type(reader, TOKEN_IF)) { + stmt->type = STATEMENT_IF_ELSE; + stmt->statement_if_else = parse_statement_if_else(reader); + } else if (match_type(reader, TOKEN_RETURN)) { + stmt->type = STATEMENT_RETURN; + stmt->statement_return = parse_statement_return(reader); + } else { + printf("token_type=%d\n", peek(reader).type); + fail("expected statement"); + } + + return stmt; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..66df2e4 --- /dev/null +++ b/parser.h @@ -0,0 +1,12 @@ +#pragma once + +#include "lexer.h" +#include "ast.h" + +struct token_reader { + bool have_token; + struct token token; + struct lexer_state lexer_state; +}; + +struct statement * parse_statement(struct token_reader * reader); diff --git a/print_class b/print_class new file mode 100755 index 0000000..7acecf9 Binary files /dev/null and b/print_class differ diff --git a/printf.c b/printf.c new file mode 100644 index 0000000..f174025 --- /dev/null +++ b/printf.c @@ -0,0 +1,186 @@ +#include +#include + +#include "string_parse.h" +#include "string_unparse.h" +#include "printf.h" +//#include "sh7091_scif.h" + +enum format_type { + FORMAT_BASE10_UNSIGNED, + FORMAT_BASE10, + FORMAT_BASE10_64, + FORMAT_POINTER, + FORMAT_BASE16, + FORMAT_STRING, + FORMAT_CHAR, + FORMAT_PERCENT, +}; + +struct format { + enum format_type type; + int pad_length; + char fill_char; +}; + +static const char * parse_escape(const char * format, struct format * ft); + +static const char * parse_fill_pad(const char * format, struct format * ft) +{ + if (*format == 0) + return format; + if (*format >= '1' || *format <= '9') + ft->fill_char = ' '; + else + ft->fill_char = *format++; + format = parse_base10(format, &ft->pad_length); + return parse_escape(format, ft); +} + +static const char * parse_escape(const char * format, struct format * ft) +{ + switch (*format) { + case 0: + return format; + case 'u': + ft->type = FORMAT_BASE10_UNSIGNED; + return format + 1; + case 'd': + ft->type = FORMAT_BASE10; + return format + 1; + case 'l': + ft->type = FORMAT_BASE10_64; + return format + 1; + case 'p': + ft->type = FORMAT_POINTER; + return format + 1; + case 'x': + ft->type = FORMAT_BASE16; + return format + 1; + case 's': + ft->type = FORMAT_STRING; + return format + 1; + case 'c': + ft->type = FORMAT_CHAR; + return format + 1; + case '%': + ft->type = FORMAT_PERCENT; + return format + 1; + default: + return parse_fill_pad(format, ft); + } +} + +void print_string(const char * s, int length) +{ + for (int i = 0; i < length; i++) { + print_char(s[i]); + } +} + +void print_bytes(const uint8_t * s, int length) +{ + for (int i = 0; i < length; i++) { + print_char(s[i]); + } +} + +void print_chars(const uint16_t * s, int length) +{ + for (int i = 0; i < length; i++) { + print_char(s[i]); + } +} + +void print_cstring(const char * s) +{ + while (*s != 0) { + print_char(*s++); + } +} + +void _printf(const char * format, ...) +{ + va_list args; + va_start(args, format); + + while (true) { + if (*format == 0) + break; + + switch (*format) { + case '%': + { + struct format ft = {0}; + format = parse_escape(format + 1, &ft); + switch (ft.type) { + case FORMAT_BASE10_UNSIGNED: + { + uint32_t num = va_arg(args, uint32_t); + char s[10]; + int offset = unparse_base10_unsigned(s, num, ft.pad_length, ft.fill_char); + print_string(s, offset); + } + break; + case FORMAT_BASE10: + { + int32_t num = va_arg(args, int32_t); + char s[10]; + int offset = unparse_base10(s, num, ft.pad_length, ft.fill_char); + print_string(s, offset); + } + break; + case FORMAT_BASE10_64: + { + int64_t num = va_arg(args, int64_t); + char s[20]; + int offset = unparse_base10_64(s, num, ft.pad_length, ft.fill_char); + print_string(s, offset); + } + break; + case FORMAT_POINTER: + { + print_char('0'); + print_char('x'); + } + /* fall through */; + case FORMAT_BASE16: + { + uint32_t num = va_arg(args, uint32_t); + char s[8]; + int offset = unparse_base16(s, num, ft.pad_length, ft.fill_char); + print_string(s, offset); + } + break; + case FORMAT_STRING: + { + const char * s = va_arg(args, const char *); + while (*s != 0) { + char c = *s++; + print_char(c); + } + } + break; + case FORMAT_CHAR: + { + const int c = va_arg(args, const int); + print_char((char)c); + } + break; + case FORMAT_PERCENT: + print_char('%'); + break; + } + } + break; + default: + { + char c = *format++; + print_char(c); + } + break; + } + } + + va_end(args); +} diff --git a/printf.h b/printf.h new file mode 100644 index 0000000..1713fba --- /dev/null +++ b/printf.h @@ -0,0 +1,47 @@ +#pragma once + +#if defined(__dreamcast__) +#include "sh7091_scif.h" +#else +#include +#endif + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void print_char(char c) +{ +#if defined(__dreamcast__) + // scif_character(c); +#else + fputc(c, stderr); +#endif +} + +void print_string(const char * s, int length); +void print_bytes(const uint8_t * s, int length); +void print_chars(const uint16_t * s, int length); +void print_cstring(const char * s); + +void _printf(const char * format, ...); + +#define printf(...) _printf(__VA_ARGS__) +#define printc(c) print_char(c) +#define prints(s) print_cstring(s) + +#if defined(DEBUG_PRINT) +#define debugf(...) _printf(__VA_ARGS__) +#define debugc(c) print_char(c) +#define debugs(s) print_cstring(s) +#else +#define debugf(...) +#define debugc(c) +#define debugs(c) +#endif + +#ifdef __cplusplus +} +#endif diff --git a/string_parse.c b/string_parse.c new file mode 100644 index 0000000..05cb963 --- /dev/null +++ b/string_parse.c @@ -0,0 +1,70 @@ +#include + +#include "string_parse.h" + +int parse_base10_digit(char c) +{ + switch (c) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + default: return -1; + } +} + +const char * parse_base10(const char * s, int * n) +{ + *n = 0; + int sign = 1; + + if (*s == '-') { + sign = -1; + s++; + } + + while (true) { + int digit = parse_base10_digit(*s); + if (digit == -1) + break; + + *n *= 10; + *n += digit; + s++; + } + + *n *= sign; + + return s; +} + +const char * parse_base10_64(const char * s, int64_t * n) +{ + *n = 0; + int sign = 1; + + if (*s == '-') { + sign = -1; + s++; + } + + while (true) { + int digit = parse_base10_digit(*s); + if (digit == -1) + break; + + *n *= 10; + *n += digit; + s++; + } + + *n *= sign; + + return s; +} diff --git a/string_parse.h b/string_parse.h new file mode 100644 index 0000000..3e7f4e4 --- /dev/null +++ b/string_parse.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +const char * parse_skip(const char * s, char c); +const char * parse_find(const char * s, char c); +const char * parse_find_first_right(const char * s, int length, char c); +int parse_base10_digit(char c); +const char * parse_base10(const char * s, int * n); +const char * parse_base10_64(const char * s, int64_t * n); +const char * parse_match(const char * s, const char * m); +int parse_stride(const char * s, int length); +int parse_height(const char * s, int length); + +#ifdef __cplusplus +} +#endif diff --git a/string_unparse.c b/string_unparse.c new file mode 100644 index 0000000..45a78fa --- /dev/null +++ b/string_unparse.c @@ -0,0 +1,194 @@ +#include + +#include "minmax.h" +#include "string_unparse.h" + +int digits_base10(uint32_t n) +{ + if (n >= 1000000000ul) return 10; + if (n >= 100000000ul) return 9; + if (n >= 10000000ul) return 8; + if (n >= 1000000ul) return 7; + if (n >= 100000ul) return 6; + if (n >= 10000ul) return 5; + if (n >= 1000ul) return 4; + if (n >= 100ul) return 3; + if (n >= 10ul) return 2; + return 1; +} + +int unparse_base10_unsigned(char * s, uint32_t n, int len, char fill) +{ + int digits = 0; + digits += digits_base10(n); + len = max(digits, len); + int ret = len; + + while (len > digits) { + *s++ = fill; + --len; + } + + while (len > 0) { + const uint32_t digit = n % 10; + n = n / 10; + s[--len] = digit + 48; + } + + return ret; +} + +int unparse_base10(char * s, int32_t n, int len, char fill) +{ + bool negative = false; + int digits = 0; + if (n < 0) { + digits += 1; + n = -n; + negative = true; + } + + digits += digits_base10(n); + len = max(digits, len); + int ret = len; + + while (len > digits) { + *s++ = fill; + --len; + } + + if (negative) { + *s++ = '-'; + len--; + } + + while (len > 0) { + const uint32_t digit = n % 10; + n = n / 10; + s[--len] = digit + 48; + } + + return ret; +} + +int digits_base10_64(uint64_t n) +{ + if (n >= 10000000000000000000ull) return 20; + if (n >= 1000000000000000000ull) return 19; + if (n >= 100000000000000000ull) return 18; + if (n >= 10000000000000000ull) return 17; + if (n >= 1000000000000000ull) return 16; + if (n >= 100000000000000ull) return 15; + if (n >= 10000000000000ull) return 14; + if (n >= 1000000000000ull) return 13; + if (n >= 100000000000ull) return 12; + if (n >= 10000000000ull) return 11; + if (n >= 1000000000ull) return 10; + if (n >= 100000000ull) return 9; + if (n >= 10000000ull) return 8; + if (n >= 1000000ull) return 7; + if (n >= 100000ull) return 6; + if (n >= 10000ull) return 5; + if (n >= 1000ull) return 4; + if (n >= 100ull) return 3; + if (n >= 10ull) return 2; + return 1; +} + +int unparse_base10_64(char * s, int64_t n, int len, char fill) +{ + bool negative = false; + int digits = 0; + if (n < 0) { + digits += 1; + n = -n; + negative = true; + } + + digits += digits_base10_64(n); + len = max(digits, len); + int ret = len; + + while (len > digits) { + *s++ = fill; + --len; + } + + if (negative) { + *s++ = '-'; + len--; + } + + while (len > 0) { + const uint32_t digit = n % 10; + n = n / 10; + s[--len] = digit + 48; + } + + return ret; +} + +static int digits_base16(uint32_t n) +{ + if (n <= 0xf) return 1; + if (n <= 0xff) return 2; + if (n <= 0xfff) return 3; + if (n <= 0xffff) return 4; + if (n <= 0xfffff) return 5; + if (n <= 0xffffff) return 6; + if (n <= 0xfffffff) return 7; + return 8; +} + +int unparse_base16(char * s, uint32_t n, int len, char fill) +{ + int digits = digits_base16(n); + len = max(digits, len); + int ret = len; + + while (len > digits) { + *s++ = fill; + --len; + } + + while (len > 0) { + uint32_t nib = n & 0xf; + n = n >> 4; + if (nib > 9) { + nib += (97 - 10); + } else { + nib += (48 - 0); + } + + s[--len] = nib; + } + + return ret; +} + +#ifdef UNPARSE_TEST +#include + +int main() +{ + char s[1024]; + + { + int n = 124; + + int offset = unparse_base10(s, n, 6, ' '); + s[offset] = 0; + + printf("`%s`\n", s); + } + + { + int n = 0x5678; + + int offset = unparse_base16(s, n, 7, '0'); + s[offset] = 0; + + printf("`%s`\n", s); + } +} +#endif diff --git a/string_unparse.h b/string_unparse.h new file mode 100644 index 0000000..ba8ea99 --- /dev/null +++ b/string_unparse.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int unparse_base10_unsigned(char * s, uint32_t n, int len, char fill); +int unparse_base10(char * s, int32_t n, int len, char fill); +int unparse_base10_64(char * s, int64_t n, int len, char fill); +int unparse_base16(char * s, uint32_t n, int len, char fill); + +int digits_base_64(uint64_t n); +int digits_base10_64(uint64_t n); + +#ifdef __cplusplus +} +#endif