diff --git a/Cargo.lock b/Cargo.lock index 25bef26..4304154 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1159,6 +1159,15 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pawarser" +version = "0.1.0" +dependencies = [ + "drop_bomb", + "enumset", + "rowan", +] + [[package]] name = "petgraph" version = "0.6.5" diff --git a/Cargo.toml b/Cargo.toml index 6b774ed..e5c6dc7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ members = [ "crates/lang", "crates/svg-filters", "crates/prowocessing", - "crates/executor-poc", + "crates/executor-poc", "crates/pawarser", ] resolver = "2" diff --git a/crates/pawarser/Cargo.toml b/crates/pawarser/Cargo.toml new file mode 100644 index 0000000..787cb2f --- /dev/null +++ b/crates/pawarser/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "pawarser" +version = "0.1.0" +edition = "2021" + +[dependencies] +rowan = "0.15.15" +drop_bomb = "0.1.5" +enumset = "1.1.3" + +[lints] +workspace = true diff --git a/crates/pawarser/src/lib.rs b/crates/pawarser/src/lib.rs new file mode 100644 index 0000000..92dcd54 --- /dev/null +++ b/crates/pawarser/src/lib.rs @@ -0,0 +1,2 @@ +#![feature(iter_collect_into)] +pub mod parser; diff --git a/crates/pawarser/src/parser.rs b/crates/pawarser/src/parser.rs new file mode 100644 index 0000000..196c776 --- /dev/null +++ b/crates/pawarser/src/parser.rs @@ -0,0 +1,78 @@ +use std::cell::Cell; + +use enumset::{EnumSet, EnumSetType}; + +use self::{error::SyntaxError, event::Event, input::Input}; + +mod error; +mod event; +pub mod input; + +pub struct Parser< + 'src, + 'toks, + SyntaxKind: EnumSetType + Into, + SyntaxErr: SyntaxError, +> { + input: Input<'src, 'toks, SyntaxKind>, + pos: usize, + events: Vec>, + step_limit: u32, + steps: Cell, +} + +pub struct ParserBuilder< + 'src, + 'toks, + SyntaxKind: EnumSetType + Into, + // SyntaxErr: SyntaxError, +> { + raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, + meaningless_token_kinds: EnumSet, + step_limit: u32, +} + +impl<'src, 'toks, SyntaxKind: EnumSetType + Into> + ParserBuilder<'src, 'toks, SyntaxKind> +{ + pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self { + Self { + raw_toks, + meaningless_token_kinds: EnumSet::new(), + step_limit: 4096, + } + } + + /// Sets the parser step limit. + /// Defaults to 4096 + pub fn step_limit(mut self, new: u32) -> Self { + self.step_limit = new; + self + } + + pub fn add_meaningless(mut self, kind: SyntaxKind) -> Self { + self.meaningless_token_kinds.insert(kind); + self + } + + pub fn add_meaningless_many(mut self, kind: Vec) -> Self { + self.meaningless_token_kinds + .insert_all(kind.into_iter().collect()); + self + } + + pub fn build(self) -> Parser<'src, 'toks, SyntaxKind, SyntaxErr> { + let Self { + raw_toks, + meaningless_token_kinds, + step_limit, + } = self; + Parser { + input: Input::new(raw_toks, Some(meaningless_token_kinds)), + pos: 0, + events: Vec::new(), + step_limit, + steps: Cell::new(0), + } + } +} diff --git a/crates/pawarser/src/parser/error.rs b/crates/pawarser/src/parser/error.rs new file mode 100644 index 0000000..e27c536 --- /dev/null +++ b/crates/pawarser/src/parser/error.rs @@ -0,0 +1,3 @@ +/// A marker trait... for now! +// TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible +pub trait SyntaxError {} diff --git a/crates/pawarser/src/parser/event.rs b/crates/pawarser/src/parser/event.rs new file mode 100644 index 0000000..cb1bed7 --- /dev/null +++ b/crates/pawarser/src/parser/event.rs @@ -0,0 +1,45 @@ +use enumset::EnumSetType; + +use super::error::SyntaxError; + +pub enum Event, SyntaxErr: SyntaxError> { + Start { + kind: NodeKind, + forward_parent: Option, + }, + Finish, + Eat { + count: usize, + }, +} + +impl, SyntaxErr: SyntaxError> + Event +{ + pub fn tombstone() -> Self { + Self::Start { + kind: NodeKind::Tombstone, + forward_parent: None, + } + } +} + +pub enum NodeKind, SyntaxErr: SyntaxError> { + Tombstone, + Syntax(SyntaxKind), + Error(SyntaxErr), +} + +impl, SyntaxErr: SyntaxError> + NodeKind +{ + pub fn is_tombstone(&self) -> bool { + matches!(self, Self::Tombstone) + } + pub fn is_syntax(&self) -> bool { + matches!(self, Self::Syntax(_)) + } + pub fn is_error(&self) -> bool { + matches!(self, Self::Error(_)) + } +} diff --git a/crates/pawarser/src/parser/input.rs b/crates/pawarser/src/parser/input.rs new file mode 100644 index 0000000..b148497 --- /dev/null +++ b/crates/pawarser/src/parser/input.rs @@ -0,0 +1,34 @@ +use enumset::{EnumSet, EnumSetType}; + +pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into> { + raw: &'toks Vec<(SyntaxKind, &'src str)>, + // enumset of meaningless tokens + semantically_meaningless: EnumSet, + // indices of non-meaningless tokens + meaningful_toks: Vec, +} + +impl<'src, 'toks, SyntaxKind: EnumSetType + Into> + Input<'src, 'toks, SyntaxKind> +{ + pub fn new( + raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, + meaningless: Option>, + ) -> Self { + let mut meaningful_toks = Vec::new(); + + if let Some(meaningless) = meaningless { + let meaningful_toks = raw_toks + .iter() + .enumerate() + .filter_map(|(i, tok)| (!meaningless.contains(tok.0)).then_some(i)) + .collect_into(&mut meaningful_toks); + } + + Self { + raw: raw_toks, + semantically_meaningless: meaningless.unwrap_or_default(), + meaningful_toks, + } + } +}