#include "lexer.h" static inline bool is_whitespace(uint8_t c) { return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } static inline bool is_identifier_start(uint8_t c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_'); } static inline bool is_decimal_digit(uint8_t c) { return (c >= '0' && c <= '9'); } struct keyword_desc { const uint8_t * buf; int length; enum token_type token_type; }; static const struct keyword_desc keywords[] = { { .buf = (const uint8_t *)"int", .length = 3, .token_type = TOKEN_INT, }, { .buf = (const uint8_t *)"void", .length = 4, .token_type = TOKEN_VOID, }, { .buf = (const uint8_t *)"return", .length = 6, .token_type = TOKEN_RETURN, }, { .buf = (const uint8_t *)"if", .length = 2, .token_type = TOKEN_RETURN, }, { .buf = (const uint8_t *)"else", .length = 4, .token_type = TOKEN_RETURN, }, }; static inline bool keyword_equal(const uint8_t * buf, int start, int end, const struct keyword_desc * keyword) { int length = end - start; if (length != keyword->length) return false; int i = start; int j = 0; while (i < end) { if (buf[i++] != keyword->buf[j++]) return false; } return true; } static inline enum token_type find_keyword(const uint8_t * buf, int start, int end) { for (unsigned int i = 0; i < (sizeof (keywords)) / (sizeof (keywords[0])); i++) { if (keyword_equal(buf, start, end, &keywords[i])) { return keywords[i].token_type; } } return TOKEN_IDENTIFIER; } struct token lexer_next_token(struct lexer_state * state) { struct token token; int token_start; int token_end; while (state->offset < state->size && is_whitespace(state->buf[state->offset])) { state->offset += 1; } token_start = state->offset; uint8_t c = state->buf[state->offset++]; switch (c) { case '(': token.type = TOKEN_LPAREN; break; case ')': token.type = TOKEN_RPAREN; break; case '{': token.type = TOKEN_LBRACE; break; case '}': token.type = TOKEN_RBRACE; break; case ';': token.type = TOKEN_SEMICOLON; break; default: if (is_identifier_start(c)) { while (state->offset < state->size) { uint8_t c = state->buf[state->offset]; if (!(is_identifier_start(c) || is_decimal_digit(c))) break; state->offset += 1; } token.type = find_keyword(state->buf, token_start, state->offset); } else if (is_decimal_digit(c)) { while (state->offset < state->size) { uint8_t c = state->buf[state->offset]; if (!(is_decimal_digit(c))) { if (is_identifier_start(c)) token.type = TOKEN_INVALID; else { token.type = TOKEN_CONSTANT; } break; } state->offset += 1; } } else { token.type = TOKEN_INVALID; } break; } token_end = state->offset; if (token_start >= state->size) token.type = TOKEN_EOF; token.start = &state->buf[token_start]; token.end = &state->buf[token_end]; return token; }