parser: initial statement/expression

This commit is contained in:
Zack Buhman 2025-02-24 17:41:06 -06:00
parent c8321747f6
commit 19d79a7914
18 changed files with 697 additions and 23 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*.o
*.d

43
Makefile Normal file
View File

@ -0,0 +1,43 @@
%.csv: %.ods
libreoffice --headless --convert-to csv:"Text - txt - csv (StarCalc)":44,34,76,,,,true --outdir $(dir $@) $<
include compiler.mk
MAKEFILE_PATH := $(patsubst %/,%,$(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
CC ?= gcc
ARCH = -m32
CFLAGS += -Wall -Werror -Wfatal-errors -Wno-error=unused-variable -fstack-protector -std=c2x -g
CFLAGS += -I$(MAKEFILE_PATH)/
CFLAGS += -I$(MAKEFILE_PATH)/c
CFLAGS += -DDEBUG
#CFLAGS += -DDEBUG_PRINT
LDFLAGS = -lm
OPT ?= -O0
DEPFLAGS = -MMD -MP
%.o: %.c
$(CC) $(ARCH) $(CFLAGS) $(OPT) $(DEPFLAGS) -MF ${<}.d -c $< -o $@
print_class: $(OBJ) $(PRINT_CLASS_OBJ)
$(CC) $(ARCH) $(LDFLAGS) $^ -o $@
main: $(OBJ) $(MAIN_HOSTED_OBJ)
$(CC) $(ARCH) $(LDFLAGS) $^ -o $@
clean:
rm -f main print_class *.elf *.bin
find -P \
-regextype posix-egrep \
-regex '.*\.(o|d|gch)$$' \
-exec rm {} \;
.SUFFIXES:
.INTERMEDIATE:
.SECONDARY:
.PHONY: all clean phony
%: RCS/%,v
%: RCS/%
%: %,v
%: s.%
%: SCCS/s.%

View File

@ -13,4 +13,4 @@ extern void __assert_fail (const char *__assertion, const char *__file,
})
#define fail(expr) \
(__assert_fail (#expr, __FILE__, __LINE__, __ASSERT_FUNCTION);)
(__assert_fail (#expr, __FILE__, __LINE__, __ASSERT_FUNCTION))

12
ast.h
View File

@ -3,12 +3,11 @@
struct token;
struct expression {
struct token * constant;
struct token constant;
};
enum statement_type {
STATEMENT_RETURN,
STATEMENT_IF,
STATEMENT_IF_ELSE,
};
@ -18,21 +17,16 @@ struct statement_return {
struct expression * expression;
};
struct statement_if {
struct expression * expression;
struct statement * statement;
};
struct statement_if_else {
struct expression * expression;
struct statement * statement;
struct statement * statement_if;
struct statement * statement_else;
};
struct statement {
enum statement_type type;
union {
struct statement_return * statement_return;
struct statement_if * statement_if;
struct statement_if_else * statement_if_else;
};
};

8
compiler.mk Normal file
View File

@ -0,0 +1,8 @@
OBJ = \
lexer.o \
parser.o \
malloc.o \
printf.o \
string_parse.o \
string_unparse.o \
main_hosted.o

10
lexer.c
View File

@ -40,6 +40,16 @@ static const struct keyword_desc keywords[] = {
.length = 6,
.token_type = TOKEN_RETURN,
},
{
.buf = (const uint8_t *)"if",
.length = 2,
.token_type = TOKEN_RETURN,
},
{
.buf = (const uint8_t *)"else",
.length = 4,
.token_type = TOKEN_RETURN,
},
};
static inline bool keyword_equal(const uint8_t * buf, int start, int end, const struct keyword_desc * keyword)

View File

@ -10,6 +10,8 @@ enum token_type {
TOKEN_INT,
TOKEN_VOID,
TOKEN_RETURN,
TOKEN_IF,
TOKEN_ELSE,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_LBRACE,

7
malloc.h Normal file
View File

@ -0,0 +1,7 @@
#pragma once
#include <stdint.h>
#define malloct(t) (malloc_class_arena((sizeof (t))))
void * malloc_class_arena(uint32_t size);

4
minmax.h Normal file
View File

@ -0,0 +1,4 @@
#pragma once
#define min(a, b) ( (a < b) ? a : b )
#define max(a, b) ( (a > b) ? a : b )

View File

@ -1,19 +1,14 @@
#include "parser.h"
#include "lexer.h"
#include "assert.h"
struct token_reader {
bool have_token;
struct token token;
struct lexer_state lexer_state;
};
#include "printf.h"
#include "malloc.h"
struct token peek(struct token_reader * reader)
{
if (reader->have_token) {
return reader->token;
} else {
reader->token = lexer_next_token(reader->lexer_state);
reader->token = lexer_next_token(&reader->lexer_state);
reader->have_token = true;
return reader->token;
}
@ -23,13 +18,73 @@ struct token consume(struct token_reader * reader)
{
struct token token = peek(reader);
reader->have_token = false;
return token;
}
void expect_type(struct token_reader * reader, enum token_type token_type)
bool match_type(struct token_reader * reader, enum token_type token_type)
{
struct token token = consume(reader);
if (!(token->type == token_type)) {
printf("token->type=%d token_type=%d\n", token->type, token_type);
fail(token->type == token_type);
struct token token = peek(reader);
if (token.type == token_type) {
consume(reader);
return true;
} else {
return false;
}
}
struct token expect_type(struct token_reader * reader, enum token_type token_type)
{
struct token token = consume(reader);
if (!(token.type == token_type)) {
printf("token.type=%d token_type=%d\n", token.type, token_type);
fail(token.type == token_type);
}
return token;
}
struct expression * parse_expression(struct token_reader * reader)
{
struct expression * expr = malloct(struct expression);
struct token token = expect_type(reader, TOKEN_CONSTANT);
expr->constant = token;
return expr;
}
struct statement_return * parse_statement_return(struct token_reader * reader)
{
struct expression * expr = parse_expression(reader);
struct statement_return * stmt = malloct(struct statement_return);
stmt->expression = expr;
return stmt;
}
struct statement_if_else * parse_statement_if_else(struct token_reader * reader)
{
struct statement_if_else * stmt = malloct(struct statement_if_else);
expect_type(reader, TOKEN_LPAREN);
struct expression * expr = parse_expression(reader);
expect_type(reader, TOKEN_RPAREN);
struct statement * statement_if = parse_statement(reader);
struct statement * statement_else = match_type(reader, TOKEN_ELSE) ? parse_statement(reader) : nullptr;
stmt->expression = expr;
stmt->statement_if = statement_if;
stmt->statement_else = statement_else;
return stmt;
}
struct statement * parse_statement(struct token_reader * reader)
{
struct statement * stmt = malloct(struct statement);
if (match_type(reader, TOKEN_IF)) {
stmt->type = STATEMENT_IF_ELSE;
stmt->statement_if_else = parse_statement_if_else(reader);
} else if (match_type(reader, TOKEN_RETURN)) {
stmt->type = STATEMENT_RETURN;
stmt->statement_return = parse_statement_return(reader);
} else {
printf("token_type=%d\n", peek(reader).type);
fail("expected statement");
}
return stmt;
}

12
parser.h Normal file
View File

@ -0,0 +1,12 @@
#pragma once
#include "lexer.h"
#include "ast.h"
struct token_reader {
bool have_token;
struct token token;
struct lexer_state lexer_state;
};
struct statement * parse_statement(struct token_reader * reader);

BIN
print_class Executable file

Binary file not shown.

186
printf.c Normal file
View File

@ -0,0 +1,186 @@
#include <stdint.h>
#include <stdarg.h>
#include "string_parse.h"
#include "string_unparse.h"
#include "printf.h"
//#include "sh7091_scif.h"
enum format_type {
FORMAT_BASE10_UNSIGNED,
FORMAT_BASE10,
FORMAT_BASE10_64,
FORMAT_POINTER,
FORMAT_BASE16,
FORMAT_STRING,
FORMAT_CHAR,
FORMAT_PERCENT,
};
struct format {
enum format_type type;
int pad_length;
char fill_char;
};
static const char * parse_escape(const char * format, struct format * ft);
static const char * parse_fill_pad(const char * format, struct format * ft)
{
if (*format == 0)
return format;
if (*format >= '1' || *format <= '9')
ft->fill_char = ' ';
else
ft->fill_char = *format++;
format = parse_base10(format, &ft->pad_length);
return parse_escape(format, ft);
}
static const char * parse_escape(const char * format, struct format * ft)
{
switch (*format) {
case 0:
return format;
case 'u':
ft->type = FORMAT_BASE10_UNSIGNED;
return format + 1;
case 'd':
ft->type = FORMAT_BASE10;
return format + 1;
case 'l':
ft->type = FORMAT_BASE10_64;
return format + 1;
case 'p':
ft->type = FORMAT_POINTER;
return format + 1;
case 'x':
ft->type = FORMAT_BASE16;
return format + 1;
case 's':
ft->type = FORMAT_STRING;
return format + 1;
case 'c':
ft->type = FORMAT_CHAR;
return format + 1;
case '%':
ft->type = FORMAT_PERCENT;
return format + 1;
default:
return parse_fill_pad(format, ft);
}
}
void print_string(const char * s, int length)
{
for (int i = 0; i < length; i++) {
print_char(s[i]);
}
}
void print_bytes(const uint8_t * s, int length)
{
for (int i = 0; i < length; i++) {
print_char(s[i]);
}
}
void print_chars(const uint16_t * s, int length)
{
for (int i = 0; i < length; i++) {
print_char(s[i]);
}
}
void print_cstring(const char * s)
{
while (*s != 0) {
print_char(*s++);
}
}
void _printf(const char * format, ...)
{
va_list args;
va_start(args, format);
while (true) {
if (*format == 0)
break;
switch (*format) {
case '%':
{
struct format ft = {0};
format = parse_escape(format + 1, &ft);
switch (ft.type) {
case FORMAT_BASE10_UNSIGNED:
{
uint32_t num = va_arg(args, uint32_t);
char s[10];
int offset = unparse_base10_unsigned(s, num, ft.pad_length, ft.fill_char);
print_string(s, offset);
}
break;
case FORMAT_BASE10:
{
int32_t num = va_arg(args, int32_t);
char s[10];
int offset = unparse_base10(s, num, ft.pad_length, ft.fill_char);
print_string(s, offset);
}
break;
case FORMAT_BASE10_64:
{
int64_t num = va_arg(args, int64_t);
char s[20];
int offset = unparse_base10_64(s, num, ft.pad_length, ft.fill_char);
print_string(s, offset);
}
break;
case FORMAT_POINTER:
{
print_char('0');
print_char('x');
}
/* fall through */;
case FORMAT_BASE16:
{
uint32_t num = va_arg(args, uint32_t);
char s[8];
int offset = unparse_base16(s, num, ft.pad_length, ft.fill_char);
print_string(s, offset);
}
break;
case FORMAT_STRING:
{
const char * s = va_arg(args, const char *);
while (*s != 0) {
char c = *s++;
print_char(c);
}
}
break;
case FORMAT_CHAR:
{
const int c = va_arg(args, const int);
print_char((char)c);
}
break;
case FORMAT_PERCENT:
print_char('%');
break;
}
}
break;
default:
{
char c = *format++;
print_char(c);
}
break;
}
}
va_end(args);
}

47
printf.h Normal file
View File

@ -0,0 +1,47 @@
#pragma once
#if defined(__dreamcast__)
#include "sh7091_scif.h"
#else
#include <stdio.h>
#endif
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
static inline void print_char(char c)
{
#if defined(__dreamcast__)
// scif_character(c);
#else
fputc(c, stderr);
#endif
}
void print_string(const char * s, int length);
void print_bytes(const uint8_t * s, int length);
void print_chars(const uint16_t * s, int length);
void print_cstring(const char * s);
void _printf(const char * format, ...);
#define printf(...) _printf(__VA_ARGS__)
#define printc(c) print_char(c)
#define prints(s) print_cstring(s)
#if defined(DEBUG_PRINT)
#define debugf(...) _printf(__VA_ARGS__)
#define debugc(c) print_char(c)
#define debugs(s) print_cstring(s)
#else
#define debugf(...)
#define debugc(c)
#define debugs(c)
#endif
#ifdef __cplusplus
}
#endif

70
string_parse.c Normal file
View File

@ -0,0 +1,70 @@
#include <stddef.h>
#include "string_parse.h"
int parse_base10_digit(char c)
{
switch (c) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
default: return -1;
}
}
const char * parse_base10(const char * s, int * n)
{
*n = 0;
int sign = 1;
if (*s == '-') {
sign = -1;
s++;
}
while (true) {
int digit = parse_base10_digit(*s);
if (digit == -1)
break;
*n *= 10;
*n += digit;
s++;
}
*n *= sign;
return s;
}
const char * parse_base10_64(const char * s, int64_t * n)
{
*n = 0;
int sign = 1;
if (*s == '-') {
sign = -1;
s++;
}
while (true) {
int digit = parse_base10_digit(*s);
if (digit == -1)
break;
*n *= 10;
*n += digit;
s++;
}
*n *= sign;
return s;
}

21
string_parse.h Normal file
View File

@ -0,0 +1,21 @@
#pragma once
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
const char * parse_skip(const char * s, char c);
const char * parse_find(const char * s, char c);
const char * parse_find_first_right(const char * s, int length, char c);
int parse_base10_digit(char c);
const char * parse_base10(const char * s, int * n);
const char * parse_base10_64(const char * s, int64_t * n);
const char * parse_match(const char * s, const char * m);
int parse_stride(const char * s, int length);
int parse_height(const char * s, int length);
#ifdef __cplusplus
}
#endif

194
string_unparse.c Normal file
View File

@ -0,0 +1,194 @@
#include <stdint.h>
#include "minmax.h"
#include "string_unparse.h"
int digits_base10(uint32_t n)
{
if (n >= 1000000000ul) return 10;
if (n >= 100000000ul) return 9;
if (n >= 10000000ul) return 8;
if (n >= 1000000ul) return 7;
if (n >= 100000ul) return 6;
if (n >= 10000ul) return 5;
if (n >= 1000ul) return 4;
if (n >= 100ul) return 3;
if (n >= 10ul) return 2;
return 1;
}
int unparse_base10_unsigned(char * s, uint32_t n, int len, char fill)
{
int digits = 0;
digits += digits_base10(n);
len = max(digits, len);
int ret = len;
while (len > digits) {
*s++ = fill;
--len;
}
while (len > 0) {
const uint32_t digit = n % 10;
n = n / 10;
s[--len] = digit + 48;
}
return ret;
}
int unparse_base10(char * s, int32_t n, int len, char fill)
{
bool negative = false;
int digits = 0;
if (n < 0) {
digits += 1;
n = -n;
negative = true;
}
digits += digits_base10(n);
len = max(digits, len);
int ret = len;
while (len > digits) {
*s++ = fill;
--len;
}
if (negative) {
*s++ = '-';
len--;
}
while (len > 0) {
const uint32_t digit = n % 10;
n = n / 10;
s[--len] = digit + 48;
}
return ret;
}
int digits_base10_64(uint64_t n)
{
if (n >= 10000000000000000000ull) return 20;
if (n >= 1000000000000000000ull) return 19;
if (n >= 100000000000000000ull) return 18;
if (n >= 10000000000000000ull) return 17;
if (n >= 1000000000000000ull) return 16;
if (n >= 100000000000000ull) return 15;
if (n >= 10000000000000ull) return 14;
if (n >= 1000000000000ull) return 13;
if (n >= 100000000000ull) return 12;
if (n >= 10000000000ull) return 11;
if (n >= 1000000000ull) return 10;
if (n >= 100000000ull) return 9;
if (n >= 10000000ull) return 8;
if (n >= 1000000ull) return 7;
if (n >= 100000ull) return 6;
if (n >= 10000ull) return 5;
if (n >= 1000ull) return 4;
if (n >= 100ull) return 3;
if (n >= 10ull) return 2;
return 1;
}
int unparse_base10_64(char * s, int64_t n, int len, char fill)
{
bool negative = false;
int digits = 0;
if (n < 0) {
digits += 1;
n = -n;
negative = true;
}
digits += digits_base10_64(n);
len = max(digits, len);
int ret = len;
while (len > digits) {
*s++ = fill;
--len;
}
if (negative) {
*s++ = '-';
len--;
}
while (len > 0) {
const uint32_t digit = n % 10;
n = n / 10;
s[--len] = digit + 48;
}
return ret;
}
static int digits_base16(uint32_t n)
{
if (n <= 0xf) return 1;
if (n <= 0xff) return 2;
if (n <= 0xfff) return 3;
if (n <= 0xffff) return 4;
if (n <= 0xfffff) return 5;
if (n <= 0xffffff) return 6;
if (n <= 0xfffffff) return 7;
return 8;
}
int unparse_base16(char * s, uint32_t n, int len, char fill)
{
int digits = digits_base16(n);
len = max(digits, len);
int ret = len;
while (len > digits) {
*s++ = fill;
--len;
}
while (len > 0) {
uint32_t nib = n & 0xf;
n = n >> 4;
if (nib > 9) {
nib += (97 - 10);
} else {
nib += (48 - 0);
}
s[--len] = nib;
}
return ret;
}
#ifdef UNPARSE_TEST
#include <stdio.h>
int main()
{
char s[1024];
{
int n = 124;
int offset = unparse_base10(s, n, 6, ' ');
s[offset] = 0;
printf("`%s`\n", s);
}
{
int n = 0x5678;
int offset = unparse_base16(s, n, 7, '0');
s[offset] = 0;
printf("`%s`\n", s);
}
}
#endif

19
string_unparse.h Normal file
View File

@ -0,0 +1,19 @@
#pragma once
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
int unparse_base10_unsigned(char * s, uint32_t n, int len, char fill);
int unparse_base10(char * s, int32_t n, int len, char fill);
int unparse_base10_64(char * s, int64_t n, int len, char fill);
int unparse_base16(char * s, uint32_t n, int len, char fill);
int digits_base_64(uint64_t n);
int digits_base10_64(uint64_t n);
#ifdef __cplusplus
}
#endif