From 946ac879a7fa9135bdbdc5ba73b0738216955244 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Mon, 3 Jun 2024 11:22:36 +0200 Subject: [PATCH] lang: basic module syntax grammar --- crates/lang/src/lst_parser.rs | 6 +- crates/lang/src/lst_parser/error.rs | 3 + crates/lang/src/lst_parser/grammar.rs | 6 +- .../grammar/expression/collection/vec.rs | 13 ++- crates/lang/src/lst_parser/grammar/module.rs | 106 ++++++++++++++++++ crates/lang/src/lst_parser/syntax_kind.rs | 1 + testfiles/test.owo | 9 +- 7 files changed, 136 insertions(+), 8 deletions(-) create mode 100644 crates/lang/src/lst_parser/grammar/module.rs diff --git a/crates/lang/src/lst_parser.rs b/crates/lang/src/lst_parser.rs index a6ec4d7..5c90bb0 100644 --- a/crates/lang/src/lst_parser.rs +++ b/crates/lang/src/lst_parser.rs @@ -111,7 +111,7 @@ impl Marker { } } - fn complete_node(mut self, p: &mut Parser, kind: NodeKind) -> CompletedMarker { + fn close_node(mut self, p: &mut Parser, kind: NodeKind) -> CompletedMarker { self.bomb.defuse(); match &mut p.events[self.pos] { Event::Start { kind: slot, .. } => *slot = kind.clone(), @@ -127,11 +127,11 @@ impl Marker { } pub(crate) fn complete(self, p: &mut Parser<'_, '_>, kind: SyntaxKind) -> CompletedMarker { - self.complete_node(p, NodeKind::Syntax(kind)) + self.close_node(p, NodeKind::Syntax(kind)) } pub(crate) fn error(self, p: &mut Parser, kind: SyntaxError) -> CompletedMarker { - self.complete_node(p, NodeKind::Error(kind)) + self.close_node(p, NodeKind::Error(kind)) } pub(crate) fn abandon(mut self, p: &mut Parser<'_, '_>) { diff --git a/crates/lang/src/lst_parser/error.rs b/crates/lang/src/lst_parser/error.rs index 7fa4431..04f5e7b 100644 --- a/crates/lang/src/lst_parser/error.rs +++ b/crates/lang/src/lst_parser/error.rs @@ -7,4 +7,7 @@ pub enum SyntaxError { // if there was two space seperated items in a list SpaceSepInList, SemicolonInList, + CommaInMatOrVec, + UnterminatedTopLevelItem, + UnclosedModuleBody, } diff --git a/crates/lang/src/lst_parser/grammar.rs b/crates/lang/src/lst_parser/grammar.rs index 9d081bb..2db0824 100644 --- a/crates/lang/src/lst_parser/grammar.rs +++ b/crates/lang/src/lst_parser/grammar.rs @@ -2,6 +2,8 @@ use std::fmt::Debug; use crate::lst_parser::syntax_kind::SyntaxKind::*; +use self::module::{mod_body, top_level_item}; + use super::{ input::Input, output::Output, @@ -10,11 +12,13 @@ use super::{ }; mod expression; +mod module; pub fn source_file(p: &mut Parser) { let root = p.start("root"); - expression::expression(p, false); + mod_body(p); + // expression::expression(p, false); p.eat_succeeding_ws(); root.complete(p, ROOT); diff --git a/crates/lang/src/lst_parser/grammar/expression/collection/vec.rs b/crates/lang/src/lst_parser/grammar/expression/collection/vec.rs index 385723b..4dfd299 100644 --- a/crates/lang/src/lst_parser/grammar/expression/collection/vec.rs +++ b/crates/lang/src/lst_parser/grammar/expression/collection/vec.rs @@ -25,7 +25,6 @@ pub fn vec_matrix_list(p: &mut Parser) -> CompletedMarker { } } -// TODO: handle semicolons, other wrong toks fn finish_list(p: &mut Parser, list_start: Marker) -> CompletedMarker { loop { if p.eat(COMMA) { @@ -41,11 +40,11 @@ fn finish_list(p: &mut Parser, list_start: Marker) -> CompletedMarker { item.precede(p, "next_item") .complete(p, COLLECTION_ITEM) .precede(p, "err_space_sep") - .complete_err(p, SyntaxError::SpaceSepInList); + .error(p, SyntaxError::SpaceSepInList); } else if p.at(SEMICOLON) { let semi_err = p.start("semicolon_err"); p.eat(SEMICOLON); - semi_err.complete_err(p, SyntaxError::SemicolonInList); + semi_err.error(p, SyntaxError::SemicolonInList); if let Some(item) = atom(p) { item.precede(p, "coll_item_start") .complete(p, COLLECTION_ITEM); @@ -85,6 +84,14 @@ fn finish_mat_or_vec(p: &mut Parser, coll_start: Marker, mut row_start: Marker) p.eat(R_BRACK); return coll_start.complete(p, VEC); } + } else if p.at(COMMA) { + let err_unexpected_comma = p.start("err_unexpected_comma"); + p.do_bump(); + err_unexpected_comma.error(p, SyntaxError::CommaInMatOrVec); + } else { + let err_unexpected = p.start("err_unexpected_tok"); + p.do_bump(); + err_unexpected.error(p, SyntaxError::Expected(vec![EXPR, SEMICOLON, R_BRACK])); } } } diff --git a/crates/lang/src/lst_parser/grammar/module.rs b/crates/lang/src/lst_parser/grammar/module.rs new file mode 100644 index 0000000..fe51d2c --- /dev/null +++ b/crates/lang/src/lst_parser/grammar/module.rs @@ -0,0 +1,106 @@ +use enumset::enum_set; + +use crate::lst_parser::{ + error::SyntaxError, + grammar::expression::expression, + syntax_kind::{SyntaxKind::*, TokenSet}, + CompletedMarker, Parser, +}; + +const TOP_LEVEL_ITEM_START: TokenSet = enum_set!(DEF_KW | MOD_KW | USE_KW); + +pub fn mod_body(p: &mut Parser) { + loop { + if top_level_item(p).is_none() { + break; + } + } +} + +fn mod_decl(p: &mut Parser) -> Option { + let mod_start = p.start("module"); + if !p.eat(MOD_KW) { + return None; + } + + let mod_name = p.start("module_name"); + if p.eat(IDENT) { + mod_name.complete(p, MODULE_NAME); + } else { + mod_name.error(p, SyntaxError::Expected(vec![IDENT])); + } + + let mod_body_marker = p.start("mod_body"); + if p.eat(SEMICOLON) { + mod_body_marker.abandon(p); + Some(mod_start.complete(p, MODULE)) + } else if p.eat(L_BRACE) { + mod_body(p); + if !p.eat(R_BRACE) { + mod_body_marker + .complete(p, MODULE_BODY) + .precede(p, "unclosed_mod_body_err") + .error(p, SyntaxError::UnclosedModuleBody); + } else { + mod_body_marker.complete(p, MODULE_BODY); + } + Some(mod_start.complete(p, MODULE)) + } else { + Some(mod_start.error(p, SyntaxError::Expected(vec![MODULE_BODY]))) + } +} + +pub fn top_level_item(p: &mut Parser) -> Option { + if !TOP_LEVEL_ITEM_START.contains(p.current()) { + return None; + } + def(p).or_else(|| mod_decl(p)) +} + +fn def(p: &mut Parser) -> Option { + let def_start = p.start("top_level_def"); + if !p.eat(DEF_KW) { + def_start.abandon(p); + return None; + } + + let def_name = p.start("def_name"); + if p.eat(IDENT) { + def_name.complete(p, DEF_NAME); + } else { + def_name.error(p, SyntaxError::Expected(vec![IDENT])); + } + + let maybe_expected_eq = p.start("maybe_expect_eq"); + if !p.eat(EQ) { + maybe_expected_eq.error(p, SyntaxError::Expected(vec![EQ])); + } else { + maybe_expected_eq.abandon(p); + } + + let body = p.start("def_body"); + if expression(p, false).is_some() { + body.complete(p, DEF_BODY); + } else { + body.error(p, SyntaxError::Expected(vec![DEF_BODY])); + } + + let def = def_start.complete(p, DEF); + Some(if p.eat(SEMICOLON) { + def + } else if TOP_LEVEL_ITEM_START.contains(p.current()) || p.at(EOF) { + def.precede(p, "unterminated_tl_item") + .error(p, SyntaxError::UnterminatedTopLevelItem) + } else { + def.precede(p, "err_unexpected") + .error(p, SyntaxError::Expected(vec![SEMICOLON])) + }) +} + +fn r#use(p: &mut Parser) -> Option { + if !p.eat(USE_KW) { + return None; + } + + todo!() +} diff --git a/crates/lang/src/lst_parser/syntax_kind.rs b/crates/lang/src/lst_parser/syntax_kind.rs index 4914aeb..7b0fe41 100644 --- a/crates/lang/src/lst_parser/syntax_kind.rs +++ b/crates/lang/src/lst_parser/syntax_kind.rs @@ -30,6 +30,7 @@ pub enum SyntaxKind { #[token("mod")] MOD_KW, MODULE, + MODULE_NAME, MODULE_BODY, #[token("use")] USE_KW, diff --git a/testfiles/test.owo b/testfiles/test.owo index ac1af18..b50e5c6 100644 --- a/testfiles/test.owo +++ b/testfiles/test.owo @@ -1 +1,8 @@ -meow | gay | +def hello_world = meow [ 1 2 ] + +def test + +mod hello { + def meow = uwu; +} +