From 0de076ace166de49372580c0a8980ab356b6559b Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Mon, 3 Jun 2024 12:05:38 +0200 Subject: [PATCH] lang: finish module/top level syntax --- crates/lang/src/lst_parser/error.rs | 2 + crates/lang/src/lst_parser/grammar/module.rs | 77 +++++++++++++++++++- crates/lang/src/lst_parser/syntax_kind.rs | 6 ++ testfiles/test.owo | 4 +- 4 files changed, 86 insertions(+), 3 deletions(-) diff --git a/crates/lang/src/lst_parser/error.rs b/crates/lang/src/lst_parser/error.rs index 04f5e7b..73f290f 100644 --- a/crates/lang/src/lst_parser/error.rs +++ b/crates/lang/src/lst_parser/error.rs @@ -10,4 +10,6 @@ pub enum SyntaxError { CommaInMatOrVec, UnterminatedTopLevelItem, UnclosedModuleBody, + UnfinishedPath, + PathSepContainsSemicolon, } diff --git a/crates/lang/src/lst_parser/grammar/module.rs b/crates/lang/src/lst_parser/grammar/module.rs index fe51d2c..1bce2a4 100644 --- a/crates/lang/src/lst_parser/grammar/module.rs +++ b/crates/lang/src/lst_parser/grammar/module.rs @@ -20,6 +20,7 @@ pub fn mod_body(p: &mut Parser) { fn mod_decl(p: &mut Parser) -> Option { let mod_start = p.start("module"); if !p.eat(MOD_KW) { + mod_start.abandon(p); return None; } @@ -54,7 +55,7 @@ pub fn top_level_item(p: &mut Parser) -> Option { if !TOP_LEVEL_ITEM_START.contains(p.current()) { return None; } - def(p).or_else(|| mod_decl(p)) + def(p).or_else(|| mod_decl(p)).or_else(|| r#use(p)) } fn def(p: &mut Parser) -> Option { @@ -98,9 +99,81 @@ fn def(p: &mut Parser) -> Option { } fn r#use(p: &mut Parser) -> Option { + let use_start = p.start("use_start"); if !p.eat(USE_KW) { + use_start.abandon(p); return None; } - todo!() + if use_pat(p).is_none() { + p.start("expected_use_pat") + .error(p, SyntaxError::Expected(vec![USE_PAT])); + } + + let use_item = use_start.complete(p, DEF); + Some(if p.eat(SEMICOLON) { + use_item + } else if TOP_LEVEL_ITEM_START.contains(p.current()) || p.at(EOF) { + use_item + .precede(p, "unterminated_tl_item") + .error(p, SyntaxError::UnterminatedTopLevelItem) + } else { + use_item + .precede(p, "err_unexpected") + .error(p, SyntaxError::Expected(vec![SEMICOLON])) + }) +} + +fn use_pat(p: &mut Parser) -> Option { + let use_pat_marker = p.start("use_pat"); + if !p.eat(IDENT) { + return None; + } + + loop { + if p.eat(PATH_SEP) { + if pat_item(p).is_none() { + break Some(use_pat_marker.error(p, SyntaxError::UnfinishedPath)); + } + } else if p.at(SEMICOLON) && p.nth_at(1, COLON) { + let broken_sep = p.start("broken_path_sep"); + let wrong_semi = p.start("semi_typo"); + p.eat(SEMICOLON); + wrong_semi.error(p, SyntaxError::PathSepContainsSemicolon); + p.eat(COLON); + broken_sep.complete(p, PATH_SEP); + } else if p.at(COLON) && p.nth_at(1, SEMICOLON) { + let broken_sep = p.start("broken_path_sep"); + p.eat(COLON); + let wrong_semi = p.start("semi_typo"); + p.eat(SEMICOLON); + wrong_semi.error(p, SyntaxError::PathSepContainsSemicolon); + broken_sep.complete(p, PATH_SEP); + } else if p.at(SEMICOLON) && p.nth_at(1, SEMICOLON) { + let broken_sep = p.start("broken_path_sep"); + p.eat(SEMICOLON); + p.eat(SEMICOLON); + broken_sep + .complete(p, PATH_SEP) + .precede(p, "semi_typo_err") + .error(p, SyntaxError::PathSepContainsSemicolon); + } else if p.at(SEMICOLON) { + break Some(use_pat_marker.complete(p, USE_PAT)); + } else { + break Some(use_pat_marker.error(p, SyntaxError::Expected(vec![PATH_SEP, SEMICOLON]))); + } + } +} + +fn pat_item(p: &mut Parser) -> Option { + let item_start = p.start("pat_item_start"); + if p.eat(IDENT) { + Some(item_start.complete(p, PAT_ITEM)) + } else if p.eat(STAR) { + Some(item_start.complete(p, PAT_GLOB)) + } else if p.eat(L_BRACE) { + todo!("write PAT_GROUPs") + } else { + None + } } diff --git a/crates/lang/src/lst_parser/syntax_kind.rs b/crates/lang/src/lst_parser/syntax_kind.rs index 7b0fe41..87230e7 100644 --- a/crates/lang/src/lst_parser/syntax_kind.rs +++ b/crates/lang/src/lst_parser/syntax_kind.rs @@ -27,14 +27,20 @@ pub enum SyntaxKind { #[token("in")] IN_KW, LET_IN, + #[token("::")] + PATH_SEP, #[token("mod")] MOD_KW, MODULE, MODULE_NAME, MODULE_BODY, + USE, #[token("use")] USE_KW, USE_PAT, + PAT_ITEM, + PAT_GLOB, + PAT_GROUP, #[regex("[\\d]+")] INT_NUM, #[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)")] diff --git a/testfiles/test.owo b/testfiles/test.owo index b50e5c6..f94256a 100644 --- a/testfiles/test.owo +++ b/testfiles/test.owo @@ -1,4 +1,6 @@ -def hello_world = meow [ 1 2 ] +use hello::meow; + +def hello_world = meow [ 1 2 ]; def test