lc/lex: add "long" token and lex long integersHEAD master

author: Thomas Bracht Laumann Jespersen <t@laumann.xyz> 2025-02-10 06:47:18 +0100
committer: Thomas Bracht Laumann Jespersen <t@laumann.xyz> 2025-02-10 06:47:46 +0100
commit: 2d230e9bc9bf765d1ec4cba1695f3aa2a312fe8d (patch)
tree: 0f69a027c4742d581034d0c000112935404d9de1
parent: d8321c1947b644888f92795a75645d0c91a6a92d (diff)
5 files changed, 40 insertions, 9 deletions
diff --git a/lc/lib/lex.ml b/lc/lib/lex.ml
index 18072cc..8861898 100644
--- a/lc/lib/lex.ml
+++ b/lc/lib/lex.ml
@@ -2,6 +2,7 @@
 
 let rxident = Str.regexp {|[a-zA-Z_][a-zA-Z0-9_]*\b|}
 let rxconst = Str.regexp {|[0-9]+\b|}
+let rxconstlong = Str.regexp {|\([0-9]+\)[lL]\b|}
 let rxint = Str.regexp {|int\b|}
 let rxvoid = Str.regexp {|void\b|}
 let rxopenpar = Str.regexp {|(|}
@@ -19,6 +20,7 @@ let lex_ident input =
     let token = match s with
       | "void" -> Token.Keyword Token.Void
       | "int" -> Token.Keyword Token.Int
+      | "long" -> Token.Keyword Token.Long
       | "return" -> Token.Keyword Token.Return
       | "if" -> Token.Keyword Token.If
       | "else" -> Token.Keyword Token.Else
@@ -40,13 +42,21 @@ let lex_ident input =
 ;;
 
 let lex_const input =
-  if Str.string_match rxconst input 0
+  if Str.string_match rxconstlong input 0
+  then
+    let s = Str.matched_group 1 input in
+    let len = Str.match_end () in
+    let rest = Str.string_after input len in
+    let n = Int64.of_string s in
+    Some (Token.Constant (I64 n), len, rest)
+  else if Str.string_match rxconst input 0
   then
     let s = Str.matched_string input in
     let len = Str.match_end () in
     let rest = Str.string_after input len in
-    let n = int_of_string s in
-    Some (Token.Constant n, len, rest)
+    (* parse as larger than necessary, and truncate *)
+    let n = Int64.of_string s |> Int64.to_int in
+    Some (Token.Constant (I32 (Int32.of_int n)), len, rest)
   else
     None
 ;;
diff --git a/lc/lib/parse.ml b/lc/lib/parse.ml
index 8af61e6..94b4e37 100644
--- a/lc/lib/parse.ml
+++ b/lc/lib/parse.ml
@@ -270,7 +270,8 @@ let rec parse_exp state min_prec =
 and parse_factor state =
   let* token = next_token state in
   let* factor = match token with
-    | Constant n -> Ok (Ast.Constant n)
+    | Constant num ->
+       Ok (Ast.Constant (number_to_int num))
     | Complement ->
        let* exp' = parse_factor state in
        let ret = Ast.Unary (Ast.Complement, exp') in
@@ -626,7 +627,7 @@ and parse_stmt is_block state =
        in
        let* _ = expect Colon state in
        let* body = parse_lstmt false state in
-       return (Ast.Case ("", const, body))
+       return (Ast.Case ("", (number_to_int const), body))
     | Keyword Switch ->
        take_token state;
        let* _ = expect OpenParen state in
diff --git a/lc/lib/token.ml b/lc/lib/token.ml
index ca7f438..b699608 100644
--- a/lc/lib/token.ml
+++ b/lc/lib/token.ml
@@ -3,6 +3,7 @@
 type keyword =
   | Void
   | Int
+  | Long
   | Return
   | If
   | Else
@@ -18,10 +19,14 @@ type keyword =
   | Extern
   | Static
 
+type number =
+  | I32 of int32
+  | I64 of int64
+
 type token =
   | Keyword of keyword
   | Ident of string
-  | Constant of int
+  | Constant of number
   | OpenParen
   | CloseParen
   | OpenBrace
@@ -67,6 +72,7 @@ type token =
 let keyword_to_string = function
   | Void -> "void"
   | Int -> "int"
+  | Long -> "long"
   | Return -> "return"
   | If -> "if"
   | Else -> "else"
@@ -82,10 +88,18 @@ let keyword_to_string = function
   | Extern -> "extern"
   | Static -> "static"
 
+let number_to_int = function
+  | I32 n -> Int32.to_int n
+  | I64 n -> Int64.to_int n
+
+let number_to_string = function
+  | I32 n -> "i32(" ^ (Int32.to_string n) ^ ")"
+  | I64 n -> "i64(" ^ (Int64.to_string n) ^ ")"
+
 let token_desc = function
   | Keyword kw -> keyword_to_string kw
   | Ident ident -> Printf.sprintf "identifier (%s)" ident
-  | Constant n -> Printf.sprintf "constant (%d)\n" n
+  | Constant n -> Printf.sprintf "constant (%s)\n" (number_to_string n)
   | OpenParen -> "("
   | CloseParen -> ")"
   | OpenBrace -> "{"
@@ -131,7 +145,7 @@ let token_desc = function
 let token_to_string = function
   | Keyword kw -> "Keyword " ^ (keyword_to_string kw)
   | Ident ident -> "Ident " ^ ident
-  | Constant n -> "Constant " ^ (string_of_int n)
+  | Constant n -> "Constant " ^ (n |> number_to_string)
   | OpenParen -> "OpenParen"
   | CloseParen -> "CloseParen"
   | OpenBrace -> "OpenBrace"
diff --git a/makefile b/makefile
index 1ed2cff..9324f71 100644
--- a/makefile
+++ b/makefile
@@ -22,6 +22,6 @@ check: driver lc
 
 progress:
 	cd writing-a-c-compiler-tests; \
-	./test_compiler ../driver/_build/install/default/bin/driver --chapter 10 --bitwise --compound --increment --goto --switch
+	./test_compiler ../driver/_build/install/default/bin/driver --chapter 11 --bitwise --compound --increment --goto --switch --stage lex
 
 .PHONY: all lc driver check progress
diff --git a/tests/constlong.c b/tests/constlong.c
new file mode 100644
index 0000000..ae5d6fe
--- /dev/null
+++ b/tests/constlong.c
@@ -0,0 +1,6 @@
+int
+main(void)
+{
+	long x = -9223372036854774574;
+	return (int)x;
+}
author	Thomas Bracht Laumann Jespersen <t@laumann.xyz>	2025-02-10 06:47:18 +0100
committer	Thomas Bracht Laumann Jespersen <t@laumann.xyz>	2025-02-10 06:47:46 +0100
commit	2d230e9bc9bf765d1ec4cba1695f3aa2a312fe8d (patch)
tree	0f69a027c4742d581034d0c000112935404d9de1
parent	d8321c1947b644888f92795a75645d0c91a6a92d (diff)