summaryrefslogtreecommitdiff
path: root/c/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'c/lex.c')
-rw-r--r--c/lex.c82
1 files changed, 64 insertions, 18 deletions
diff --git a/c/lex.c b/c/lex.c
index d41562c..c2506a8 100644
--- a/c/lex.c
+++ b/c/lex.c
@@ -1,4 +1,6 @@
+#include <stdlib.h>
#include <stddef.h>
+#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
@@ -15,6 +17,8 @@ lexgetc(struct lexstate *lexer)
lexer->column++;
return lexer->buf[lexer->pos++];
}
+ /* TODO we could check if the last character is 0 and handle
+ * it here... */
return UINT32_MAX;
}
@@ -57,12 +61,15 @@ lexsingle(struct lexstate *lexer)
skipwhitespace(lexer);
+ ret.token.loc.lineno = lexer->lineno;
+ ret.token.loc.column = lexer->column-1;
+
d = lexgetc(lexer);
- if (d == UINT32_MAX)
+ if (d == UINT32_MAX) {
+ ret.res = Lmore;
return ret;
+ }
c = (u8)d;
- ret.token.loc.lineno = lexer->lineno;
- ret.token.loc.column = lexer->column;
ret.res = Lok;
switch (c) {
case ';':
@@ -103,27 +110,58 @@ lexsingle(struct lexstate *lexer)
}
usize len = lexer->pos-start;
memcpy(str, &lexer->buf[start], len);
- printf("token range from %zu to %zu\n", start, lexer->pos);
- printf("token value = %s\n", str);
enum tokentype t = findkeyword((char *)str);
if (t == Txxx) {
t = Tident;
- printf("identifier! %s\n", str);
- } else {
- printf("keyword! %s\n", tokname(t));
+ /* TODO introduce own string type? smallstr */
+ ret.token.str = strdup((char *)str);
}
ret.token.type = t;
return ret;
}
+ /* An unexpected character... */
+ ret.res = Lerror;
return ret;
}
+static void
+printfileloc(char *filename, struct location loc)
+{
+ printf("%s:%zu:%zu: ", filename, loc.lineno, loc.column);
+}
+
+static void
+printtoken(struct token tok, char *filename)
+{
+ printfileloc(filename, tok.loc);
+ if (tok.type < Tident) {
+ /* keyword */
+ printf("%s\n", tokname(tok.type));
+ } else if (tok.type == Tident) {
+ char *s = tok.str ? tok.str : "<meh>";
+ printf("ident - %s\n", s);
+ } else if (tok.type == Tconstant) {
+ printf("constant...\n");
+ } else {
+ /* syntax/operators */
+ printf("%s\n", tokname(tok.type));
+ }
+}
+
+void
+freetoken(struct token tok)
+{
+ if (tok.type == Tident)
+ free(tok.str);
+}
+
struct lexresult
lex(struct lexstate *lexer)
{
struct lexresult ret = {0};
+ char filename[] = "../tests/return_2.c";
if (lexer->respos < lexer->nres) {
ret = lexer->results[lexer->respos];
@@ -136,16 +174,24 @@ lex(struct lexstate *lexer)
}
/* lex everything we got in the buffer */
- ret = lexsingle(lexer);
- ret = lexsingle(lexer);
- /* usize i = 0; */
- /* while (true) { */
- /* ret = lexsingle(lexer); */
- /* if (ret.type == Txxx) */
- /* break; */
- /* lexer->results[i++] = ret; */
- /* } */
- /* something non-whitespace */
+ while (true) {
+ ret = lexsingle(lexer);
+ switch (ret.res) {
+ case Lok:
+ /* TODO assign token to lexer buffer */
+ printtoken(ret.token, filename);
+ freetoken(ret.token);
+ break;
+ case Lmore:
+ printf("no more tokens to lex in current input!\n");
+ goto lexout;
+ case Lerror:
+ printf("error: unexpected character in input\n");
+ goto lexout;
+ }
+ }
+lexout:
+ /* TODO return first token from input (if available) */
return ret;
}