From ec2ff5778b9161fe5805dade2c517177bd8f27ef Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Sun, 13 Oct 2024 16:44:59 +0200 Subject: [PATCH] pawarser(setup): basic parser stuff and types around it. also, a builder. --- crates/pawarser/src/lib.rs | 129 ++++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 1 deletion(-) diff --git a/crates/pawarser/src/lib.rs b/crates/pawarser/src/lib.rs index f1413cb..16955d1 100644 --- a/crates/pawarser/src/lib.rs +++ b/crates/pawarser/src/lib.rs @@ -1,9 +1,15 @@ #![feature(iter_collect_into)] pub mod parser { + use std::cell::Cell; + + use enumset::{EnumSet, EnumSetType}; + + use self::{error::SyntaxError, event::Event, input::Input}; + pub mod input { use enumset::{EnumSet, EnumSetType}; - struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into> { + pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into> { raw: &'toks Vec<(SyntaxKind, &'src str)>, // enumset of meaningless tokens semantically_meaningless: EnumSet, @@ -36,4 +42,125 @@ pub mod parser { } } } + mod event { + use enumset::EnumSetType; + + use super::error::SyntaxError; + + pub enum Event, SyntaxErr: SyntaxError> { + Start { + kind: NodeKind, + forward_parent: Option, + }, + Finish, + Eat { + count: usize, + }, + } + + impl, SyntaxErr: SyntaxError> + Event + { + pub fn tombstone() -> Self { + Self::Start { + kind: NodeKind::Tombstone, + forward_parent: None, + } + } + } + + pub enum NodeKind, SyntaxErr: SyntaxError> { + Tombstone, + Syntax(SyntaxKind), + Error(SyntaxErr), + } + + impl, SyntaxErr: SyntaxError> + NodeKind + { + pub fn is_tombstone(&self) -> bool { + matches!(self, Self::Tombstone) + } + pub fn is_syntax(&self) -> bool { + matches!(self, Self::Syntax(_)) + } + pub fn is_error(&self) -> bool { + matches!(self, Self::Error(_)) + } + } + } + mod error { + /// A marker trait... for now! + // TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible + pub trait SyntaxError {} + } + + pub struct Parser< + 'src, + 'toks, + SyntaxKind: EnumSetType + Into, + SyntaxErr: SyntaxError, + > { + input: Input<'src, 'toks, SyntaxKind>, + pos: usize, + events: Vec>, + step_limit: u32, + steps: Cell, + } + + pub struct ParserBuilder< + 'src, + 'toks, + SyntaxKind: EnumSetType + Into, + // SyntaxErr: SyntaxError, + > { + raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, + meaningless_token_kinds: EnumSet, + step_limit: u32, + } + + impl<'src, 'toks, SyntaxKind: EnumSetType + Into> + ParserBuilder<'src, 'toks, SyntaxKind> + { + pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self { + Self { + raw_toks, + meaningless_token_kinds: EnumSet::new(), + step_limit: 4096, + } + } + + /// Sets the parser step limit. + /// Defaults to 4096 + pub fn step_limit(mut self, new: u32) -> Self { + self.step_limit = new; + self + } + + pub fn add_meaningless(mut self, kind: SyntaxKind) -> Self { + self.meaningless_token_kinds.insert(kind); + self + } + + pub fn add_meaningless_many(mut self, kind: Vec) -> Self { + self.meaningless_token_kinds + .insert_all(kind.into_iter().collect()); + self + } + + pub fn build(self) -> Parser<'src, 'toks, SyntaxKind, SyntaxErr> { + let Self { + raw_toks, + meaningless_token_kinds, + step_limit, + } = self; + Parser { + input: Input::new(raw_toks, Some(meaningless_token_kinds)), + pos: 0, + events: Vec::new(), + step_limit, + steps: Cell::new(0), + } + } + } }