#include #include #include #include #include #include #include #include "types.h" #include "token.h" #include "lex.h" /* return the next character available, or an end-of-input indicator */ static u32 lexgetc(struct lexstate *lexer) { if (lexer->pos < lexer->len) { lexer->column++; return lexer->buf[lexer->pos++]; } /* TODO we could check if the last character is 0 and handle * it here... */ return UINT32_MAX; } static void lexungetc(struct lexstate *lexer) { lexer->pos--; } static void skipwhitespace(struct lexstate *lexer) { u32 d; for (d = lexgetc(lexer); d != UINT32_MAX; d = lexgetc(lexer)) { u8 c = (u8)d; switch (c) { case '\n': lexer->lineno++; lexer->column = 0; break; case '\t': case ' ': /* lexgetc() increments column */ break; default: lexungetc(lexer); return; } } } static struct lexresult lexsingle(struct lexstate *lexer) { u32 d; u8 c; struct lexresult ret = {0}; skipwhitespace(lexer); ret.token.loc.lineno = lexer->lineno; ret.token.loc.column = lexer->column-1; d = lexgetc(lexer); if (d == UINT32_MAX) { ret.res = Lmore; return ret; } c = (u8)d; ret.res = Lok; switch (c) { case ';': ret.token.type = Tsemicolon; return ret; case '(': ret.token.type = Tlparen; return ret; case ')': ret.token.type = Trparen; return ret; case '{': ret.token.type = Tlbrace; return ret; case '}': ret.token.type = Trbrace; return ret; } if (isdigit(c)) { /* parse an integer */ /* lexnumber() */ ret.token.type = Tconstant; return ret; } if (isalpha(c) || c == '_') { /* a "save excursion" approach - save the current * lexer->pos and keep incrementing until all * alpha/digit/_ characters are consumed, then grab * the string from the buffer */ u8 str[4096] = {0}; usize start = lexer->pos-1; while ((d = lexgetc(lexer)) != UINT32_MAX) { c = (u8)d; if (!(isalnum(c) || c == '_')) { lexungetc(lexer); break; } } usize len = lexer->pos-start; memcpy(str, &lexer->buf[start], len); enum tokentype t = findkeyword((char *)str); if (t == Txxx) { t = Tident; /* TODO introduce own string type? smallstr */ ret.token.str = strdup((char *)str); } ret.token.type = t; return ret; } /* An unexpected character... */ ret.res = Lerror; return ret; } static void printfileloc(char *filename, struct location loc) { printf("%s:%zu:%zu: ", filename, loc.lineno, loc.column); } static void printtoken(struct token tok, char *filename) { printfileloc(filename, tok.loc); if (tok.type < Tident) { /* keyword */ printf("%s\n", tokname(tok.type)); } else if (tok.type == Tident) { char *s = tok.str ? tok.str : ""; printf("ident - %s\n", s); } else if (tok.type == Tconstant) { printf("constant...\n"); } else { /* syntax/operators */ printf("%s\n", tokname(tok.type)); } } void freetoken(struct token tok) { if (tok.type == Tident) free(tok.str); } struct lexresult lex(struct lexstate *lexer) { struct lexresult ret = {0}; char filename[] = "../tests/return_2.c"; if (lexer->respos < lexer->nres) { ret = lexer->results[lexer->respos]; if (ret.res == Lerror) return ret; if (ret.res == Lok) { lexer->respos++; return ret; } } /* lex everything we got in the buffer */ while (true) { ret = lexsingle(lexer); switch (ret.res) { case Lok: /* TODO assign token to lexer buffer */ printtoken(ret.token, filename); freetoken(ret.token); break; case Lmore: printf("no more tokens to lex in current input!\n"); goto lexout; case Lerror: printf("error: unexpected character in input\n"); goto lexout; } } lexout: /* TODO return first token from input (if available) */ return ret; } void lexfeed(struct lexstate *lexer, usize len, u8 *data) { memcpy(lexer->buf, data, len); lexer->len = len; } void lexreset(struct lexstate *lexer) { memset(lexer, 0, sizeof(*lexer)); lexer->lineno = 1; }