From 34ddaacb58d81d235f7f9ed2094ef600cdebc7e3 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Sun, 13 Oct 2024 16:47:53 +0200 Subject: [PATCH] pawarser(chore): split up files --- crates/pawarser/src/lib.rs | 166 +--------------------------- crates/pawarser/src/parser.rs | 78 +++++++++++++ crates/pawarser/src/parser/error.rs | 3 + crates/pawarser/src/parser/event.rs | 45 ++++++++ crates/pawarser/src/parser/input.rs | 34 ++++++ 5 files changed, 161 insertions(+), 165 deletions(-) create mode 100644 crates/pawarser/src/parser.rs create mode 100644 crates/pawarser/src/parser/error.rs create mode 100644 crates/pawarser/src/parser/event.rs create mode 100644 crates/pawarser/src/parser/input.rs diff --git a/crates/pawarser/src/lib.rs b/crates/pawarser/src/lib.rs index 16955d1..92dcd54 100644 --- a/crates/pawarser/src/lib.rs +++ b/crates/pawarser/src/lib.rs @@ -1,166 +1,2 @@ #![feature(iter_collect_into)] -pub mod parser { - use std::cell::Cell; - - use enumset::{EnumSet, EnumSetType}; - - use self::{error::SyntaxError, event::Event, input::Input}; - - pub mod input { - use enumset::{EnumSet, EnumSetType}; - - pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into> { - raw: &'toks Vec<(SyntaxKind, &'src str)>, - // enumset of meaningless tokens - semantically_meaningless: EnumSet, - // indices of non-meaningless tokens - meaningful_toks: Vec, - } - - impl<'src, 'toks, SyntaxKind: EnumSetType + Into> - Input<'src, 'toks, SyntaxKind> - { - pub fn new( - raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, - meaningless: Option>, - ) -> Self { - let mut meaningful_toks = Vec::new(); - - if let Some(meaningless) = meaningless { - let meaningful_toks = raw_toks - .iter() - .enumerate() - .filter_map(|(i, tok)| (!meaningless.contains(tok.0)).then_some(i)) - .collect_into(&mut meaningful_toks); - } - - Self { - raw: raw_toks, - semantically_meaningless: meaningless.unwrap_or_default(), - meaningful_toks, - } - } - } - } - mod event { - use enumset::EnumSetType; - - use super::error::SyntaxError; - - pub enum Event, SyntaxErr: SyntaxError> { - Start { - kind: NodeKind, - forward_parent: Option, - }, - Finish, - Eat { - count: usize, - }, - } - - impl, SyntaxErr: SyntaxError> - Event - { - pub fn tombstone() -> Self { - Self::Start { - kind: NodeKind::Tombstone, - forward_parent: None, - } - } - } - - pub enum NodeKind, SyntaxErr: SyntaxError> { - Tombstone, - Syntax(SyntaxKind), - Error(SyntaxErr), - } - - impl, SyntaxErr: SyntaxError> - NodeKind - { - pub fn is_tombstone(&self) -> bool { - matches!(self, Self::Tombstone) - } - pub fn is_syntax(&self) -> bool { - matches!(self, Self::Syntax(_)) - } - pub fn is_error(&self) -> bool { - matches!(self, Self::Error(_)) - } - } - } - mod error { - /// A marker trait... for now! - // TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible - pub trait SyntaxError {} - } - - pub struct Parser< - 'src, - 'toks, - SyntaxKind: EnumSetType + Into, - SyntaxErr: SyntaxError, - > { - input: Input<'src, 'toks, SyntaxKind>, - pos: usize, - events: Vec>, - step_limit: u32, - steps: Cell, - } - - pub struct ParserBuilder< - 'src, - 'toks, - SyntaxKind: EnumSetType + Into, - // SyntaxErr: SyntaxError, - > { - raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, - meaningless_token_kinds: EnumSet, - step_limit: u32, - } - - impl<'src, 'toks, SyntaxKind: EnumSetType + Into> - ParserBuilder<'src, 'toks, SyntaxKind> - { - pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self { - Self { - raw_toks, - meaningless_token_kinds: EnumSet::new(), - step_limit: 4096, - } - } - - /// Sets the parser step limit. - /// Defaults to 4096 - pub fn step_limit(mut self, new: u32) -> Self { - self.step_limit = new; - self - } - - pub fn add_meaningless(mut self, kind: SyntaxKind) -> Self { - self.meaningless_token_kinds.insert(kind); - self - } - - pub fn add_meaningless_many(mut self, kind: Vec) -> Self { - self.meaningless_token_kinds - .insert_all(kind.into_iter().collect()); - self - } - - pub fn build(self) -> Parser<'src, 'toks, SyntaxKind, SyntaxErr> { - let Self { - raw_toks, - meaningless_token_kinds, - step_limit, - } = self; - Parser { - input: Input::new(raw_toks, Some(meaningless_token_kinds)), - pos: 0, - events: Vec::new(), - step_limit, - steps: Cell::new(0), - } - } - } -} +pub mod parser; diff --git a/crates/pawarser/src/parser.rs b/crates/pawarser/src/parser.rs new file mode 100644 index 0000000..196c776 --- /dev/null +++ b/crates/pawarser/src/parser.rs @@ -0,0 +1,78 @@ +use std::cell::Cell; + +use enumset::{EnumSet, EnumSetType}; + +use self::{error::SyntaxError, event::Event, input::Input}; + +mod error; +mod event; +pub mod input; + +pub struct Parser< + 'src, + 'toks, + SyntaxKind: EnumSetType + Into, + SyntaxErr: SyntaxError, +> { + input: Input<'src, 'toks, SyntaxKind>, + pos: usize, + events: Vec>, + step_limit: u32, + steps: Cell, +} + +pub struct ParserBuilder< + 'src, + 'toks, + SyntaxKind: EnumSetType + Into, + // SyntaxErr: SyntaxError, +> { + raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, + meaningless_token_kinds: EnumSet, + step_limit: u32, +} + +impl<'src, 'toks, SyntaxKind: EnumSetType + Into> + ParserBuilder<'src, 'toks, SyntaxKind> +{ + pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self { + Self { + raw_toks, + meaningless_token_kinds: EnumSet::new(), + step_limit: 4096, + } + } + + /// Sets the parser step limit. + /// Defaults to 4096 + pub fn step_limit(mut self, new: u32) -> Self { + self.step_limit = new; + self + } + + pub fn add_meaningless(mut self, kind: SyntaxKind) -> Self { + self.meaningless_token_kinds.insert(kind); + self + } + + pub fn add_meaningless_many(mut self, kind: Vec) -> Self { + self.meaningless_token_kinds + .insert_all(kind.into_iter().collect()); + self + } + + pub fn build(self) -> Parser<'src, 'toks, SyntaxKind, SyntaxErr> { + let Self { + raw_toks, + meaningless_token_kinds, + step_limit, + } = self; + Parser { + input: Input::new(raw_toks, Some(meaningless_token_kinds)), + pos: 0, + events: Vec::new(), + step_limit, + steps: Cell::new(0), + } + } +} diff --git a/crates/pawarser/src/parser/error.rs b/crates/pawarser/src/parser/error.rs new file mode 100644 index 0000000..e27c536 --- /dev/null +++ b/crates/pawarser/src/parser/error.rs @@ -0,0 +1,3 @@ +/// A marker trait... for now! +// TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible +pub trait SyntaxError {} diff --git a/crates/pawarser/src/parser/event.rs b/crates/pawarser/src/parser/event.rs new file mode 100644 index 0000000..cb1bed7 --- /dev/null +++ b/crates/pawarser/src/parser/event.rs @@ -0,0 +1,45 @@ +use enumset::EnumSetType; + +use super::error::SyntaxError; + +pub enum Event, SyntaxErr: SyntaxError> { + Start { + kind: NodeKind, + forward_parent: Option, + }, + Finish, + Eat { + count: usize, + }, +} + +impl, SyntaxErr: SyntaxError> + Event +{ + pub fn tombstone() -> Self { + Self::Start { + kind: NodeKind::Tombstone, + forward_parent: None, + } + } +} + +pub enum NodeKind, SyntaxErr: SyntaxError> { + Tombstone, + Syntax(SyntaxKind), + Error(SyntaxErr), +} + +impl, SyntaxErr: SyntaxError> + NodeKind +{ + pub fn is_tombstone(&self) -> bool { + matches!(self, Self::Tombstone) + } + pub fn is_syntax(&self) -> bool { + matches!(self, Self::Syntax(_)) + } + pub fn is_error(&self) -> bool { + matches!(self, Self::Error(_)) + } +} diff --git a/crates/pawarser/src/parser/input.rs b/crates/pawarser/src/parser/input.rs new file mode 100644 index 0000000..b148497 --- /dev/null +++ b/crates/pawarser/src/parser/input.rs @@ -0,0 +1,34 @@ +use enumset::{EnumSet, EnumSetType}; + +pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into> { + raw: &'toks Vec<(SyntaxKind, &'src str)>, + // enumset of meaningless tokens + semantically_meaningless: EnumSet, + // indices of non-meaningless tokens + meaningful_toks: Vec, +} + +impl<'src, 'toks, SyntaxKind: EnumSetType + Into> + Input<'src, 'toks, SyntaxKind> +{ + pub fn new( + raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, + meaningless: Option>, + ) -> Self { + let mut meaningful_toks = Vec::new(); + + if let Some(meaningless) = meaningless { + let meaningful_toks = raw_toks + .iter() + .enumerate() + .filter_map(|(i, tok)| (!meaningless.contains(tok.0)).then_some(i)) + .collect_into(&mut meaningful_toks); + } + + Self { + raw: raw_toks, + semantically_meaningless: meaningless.unwrap_or_default(), + meaningful_toks, + } + } +}