diff options
Diffstat (limited to 'c/lex.c')
-rw-r--r-- | c/lex.c | 82 |
1 files changed, 64 insertions, 18 deletions
@@ -1,4 +1,6 @@ +#include <stdlib.h> #include <stddef.h> +#include <stdbool.h> #include <stdint.h> #include <string.h> #include <ctype.h> @@ -15,6 +17,8 @@ lexgetc(struct lexstate *lexer) lexer->column++; return lexer->buf[lexer->pos++]; } + /* TODO we could check if the last character is 0 and handle + * it here... */ return UINT32_MAX; } @@ -57,12 +61,15 @@ lexsingle(struct lexstate *lexer) skipwhitespace(lexer); + ret.token.loc.lineno = lexer->lineno; + ret.token.loc.column = lexer->column-1; + d = lexgetc(lexer); - if (d == UINT32_MAX) + if (d == UINT32_MAX) { + ret.res = Lmore; return ret; + } c = (u8)d; - ret.token.loc.lineno = lexer->lineno; - ret.token.loc.column = lexer->column; ret.res = Lok; switch (c) { case ';': @@ -103,27 +110,58 @@ lexsingle(struct lexstate *lexer) } usize len = lexer->pos-start; memcpy(str, &lexer->buf[start], len); - printf("token range from %zu to %zu\n", start, lexer->pos); - printf("token value = %s\n", str); enum tokentype t = findkeyword((char *)str); if (t == Txxx) { t = Tident; - printf("identifier! %s\n", str); - } else { - printf("keyword! %s\n", tokname(t)); + /* TODO introduce own string type? smallstr */ + ret.token.str = strdup((char *)str); } ret.token.type = t; return ret; } + /* An unexpected character... */ + ret.res = Lerror; return ret; } +static void +printfileloc(char *filename, struct location loc) +{ + printf("%s:%zu:%zu: ", filename, loc.lineno, loc.column); +} + +static void +printtoken(struct token tok, char *filename) +{ + printfileloc(filename, tok.loc); + if (tok.type < Tident) { + /* keyword */ + printf("%s\n", tokname(tok.type)); + } else if (tok.type == Tident) { + char *s = tok.str ? tok.str : "<meh>"; + printf("ident - %s\n", s); + } else if (tok.type == Tconstant) { + printf("constant...\n"); + } else { + /* syntax/operators */ + printf("%s\n", tokname(tok.type)); + } +} + +void +freetoken(struct token tok) +{ + if (tok.type == Tident) + free(tok.str); +} + struct lexresult lex(struct lexstate *lexer) { struct lexresult ret = {0}; + char filename[] = "../tests/return_2.c"; if (lexer->respos < lexer->nres) { ret = lexer->results[lexer->respos]; @@ -136,16 +174,24 @@ lex(struct lexstate *lexer) } /* lex everything we got in the buffer */ - ret = lexsingle(lexer); - ret = lexsingle(lexer); - /* usize i = 0; */ - /* while (true) { */ - /* ret = lexsingle(lexer); */ - /* if (ret.type == Txxx) */ - /* break; */ - /* lexer->results[i++] = ret; */ - /* } */ - /* something non-whitespace */ + while (true) { + ret = lexsingle(lexer); + switch (ret.res) { + case Lok: + /* TODO assign token to lexer buffer */ + printtoken(ret.token, filename); + freetoken(ret.token); + break; + case Lmore: + printf("no more tokens to lex in current input!\n"); + goto lexout; + case Lerror: + printf("error: unexpected character in input\n"); + goto lexout; + } + } +lexout: + /* TODO return first token from input (if available) */ return ret; } |