From 21bcf62ea53d55d89231f39a5b2d811e37ab1ae1 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Thu, 17 Oct 2024 09:54:09 +0200 Subject: [PATCH] pawarser(setup): continue working on the bare basics --- crates/pawarser/src/parser.rs | 95 ++++++++++++++++++++++++---- crates/pawarser/src/parser/error.rs | 6 +- crates/pawarser/src/parser/event.rs | 15 ++--- crates/pawarser/src/parser/input.rs | 32 ++++++++-- crates/pawarser/src/parser/marker.rs | 74 ++++++++++++++++++++++ 5 files changed, 196 insertions(+), 26 deletions(-) create mode 100644 crates/pawarser/src/parser/marker.rs diff --git a/crates/pawarser/src/parser.rs b/crates/pawarser/src/parser.rs index 196c776..0bac65a 100644 --- a/crates/pawarser/src/parser.rs +++ b/crates/pawarser/src/parser.rs @@ -2,18 +2,25 @@ use std::cell::Cell; use enumset::{EnumSet, EnumSetType}; -use self::{error::SyntaxError, event::Event, input::Input}; +use self::{error::SyntaxError, event::Event, input::Input, marker::Marker}; mod error; mod event; -pub mod input; +mod input; +mod marker; -pub struct Parser< - 'src, - 'toks, - SyntaxKind: EnumSetType + Into, - SyntaxErr: SyntaxError, -> { +/// this is used to define some required SyntaxKinds like an EOF token or an error token +pub trait SyntaxElement +where + Self: EnumSetType + Into + Clone, +{ + /// EOF value. This will be used by the rest of the parser library to represent an EOF. + const EOF: Self; + /// Error value. This will be used as a placeholder for associated respective errors. + const ERROR: Self; +} + +pub struct Parser<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> { input: Input<'src, 'toks, SyntaxKind>, pos: usize, events: Vec>, @@ -21,10 +28,76 @@ pub struct Parser< steps: Cell, } +impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> + Parser<'src, 'toks, SyntaxKind, SyntaxErr> +{ + /// eat all meaningless tokens at the end of the file. + pub fn eat_succeeding_meaningless(&mut self) { + self.push_ev(Event::Eat { + count: self.input.meaningless_tail_len(), + }); + } + + /// Get token from current position of the parser. + pub fn current(&self) -> SyntaxKind { + self.step(); + self.input.kind(self.pos) + } + + pub fn start(&mut self, name: &str) -> Marker { + let pos = self.events.len(); + self.push_ev(Event::tombstone()); + Marker::new(pos, name) + } + + /// Eat next token if it's of kind `kind` and return `true`. + /// Otherwise, `false`. + pub fn eat(&mut self, kind: SyntaxKind) -> bool { + if !self.at(kind) { + return false; + } + + self.do_bump(); + true + } + + fn do_bump(&mut self) { + self.push_ev(Event::Eat { + count: self.input.preceding_meaningless(self.pos), + }); + self.pos += 1; + } + + /// Check if the token at the current parser position is of `kind` + pub fn at(&self, kind: SyntaxKind) -> bool { + self.nth_at(0, kind) + } + + /// Check if the token that is `n` ahead is of `kind` + pub fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { + self.nth(n) == kind + } + + pub fn nth(&self, n: usize) -> SyntaxKind { + self.step(); + self.input.kind(self.pos + n) + } + + fn push_ev(&mut self, event: Event) { + self.events.push(event); + } + + fn step(&self) { + let steps = self.steps.get(); + assert!(steps <= self.step_limit, "the parser seems stuck."); + self.steps.set(steps + 1); + } +} + pub struct ParserBuilder< 'src, 'toks, - SyntaxKind: EnumSetType + Into, + SyntaxKind: SyntaxElement, // SyntaxErr: SyntaxError, > { raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, @@ -32,9 +105,7 @@ pub struct ParserBuilder< step_limit: u32, } -impl<'src, 'toks, SyntaxKind: EnumSetType + Into> - ParserBuilder<'src, 'toks, SyntaxKind> -{ +impl<'src, 'toks, SyntaxKind: SyntaxElement> ParserBuilder<'src, 'toks, SyntaxKind> { pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self { Self { raw_toks, diff --git a/crates/pawarser/src/parser/error.rs b/crates/pawarser/src/parser/error.rs index e27c536..ba52ff0 100644 --- a/crates/pawarser/src/parser/error.rs +++ b/crates/pawarser/src/parser/error.rs @@ -1,3 +1,7 @@ /// A marker trait... for now! // TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible -pub trait SyntaxError {} +pub trait SyntaxError +where + Self: Clone, +{ +} diff --git a/crates/pawarser/src/parser/event.rs b/crates/pawarser/src/parser/event.rs index cb1bed7..3cd0ef5 100644 --- a/crates/pawarser/src/parser/event.rs +++ b/crates/pawarser/src/parser/event.rs @@ -1,8 +1,8 @@ use enumset::EnumSetType; -use super::error::SyntaxError; +use super::{error::SyntaxError, SyntaxElement}; -pub enum Event, SyntaxErr: SyntaxError> { +pub enum Event { Start { kind: NodeKind, forward_parent: Option, @@ -13,9 +13,7 @@ pub enum Event, SyntaxErr: Syn }, } -impl, SyntaxErr: SyntaxError> - Event -{ +impl Event { pub fn tombstone() -> Self { Self::Start { kind: NodeKind::Tombstone, @@ -24,15 +22,14 @@ impl, SyntaxErr: SyntaxError> } } -pub enum NodeKind, SyntaxErr: SyntaxError> { +#[derive(Clone)] +pub enum NodeKind { Tombstone, Syntax(SyntaxKind), Error(SyntaxErr), } -impl, SyntaxErr: SyntaxError> - NodeKind -{ +impl NodeKind { pub fn is_tombstone(&self) -> bool { matches!(self, Self::Tombstone) } diff --git a/crates/pawarser/src/parser/input.rs b/crates/pawarser/src/parser/input.rs index b148497..ec2a243 100644 --- a/crates/pawarser/src/parser/input.rs +++ b/crates/pawarser/src/parser/input.rs @@ -1,6 +1,8 @@ use enumset::{EnumSet, EnumSetType}; -pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into> { +use super::SyntaxElement; + +pub struct Input<'src, 'toks, SyntaxKind: SyntaxElement> { raw: &'toks Vec<(SyntaxKind, &'src str)>, // enumset of meaningless tokens semantically_meaningless: EnumSet, @@ -8,9 +10,7 @@ pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into> meaningful_toks: Vec, } -impl<'src, 'toks, SyntaxKind: EnumSetType + Into> - Input<'src, 'toks, SyntaxKind> -{ +impl<'src, 'toks, SyntaxKind: SyntaxElement> Input<'src, 'toks, SyntaxKind> { pub fn new( raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, meaningless: Option>, @@ -31,4 +31,28 @@ impl<'src, 'toks, SyntaxKind: EnumSetType + Into> meaningful_toks, } } + + pub fn kind(&self, idx: usize) -> SyntaxKind { + let Some(meaningful_idx) = self.meaningful_toks.get(idx) else { + return SyntaxKind::EOF; + }; + + self.raw.get(*meaningful_idx).unwrap().0 + } + + pub fn preceding_meaningless(&self, idx: usize) -> usize { + assert!(self.meaningful_toks.len() > idx); + + if idx == 0 { + // maybe should be `self.meaningful_toks[idx]` instead?? + 1 + } else { + self.meaningful_toks[idx] - self.meaningful_toks[idx - 1] + } + } + + /// get the count of meaningless tokens at the end of the file. + pub fn meaningless_tail_len(&self) -> usize { + self.raw.len() - (self.meaningful_toks.last().unwrap() + 1) + } } diff --git a/crates/pawarser/src/parser/marker.rs b/crates/pawarser/src/parser/marker.rs new file mode 100644 index 0000000..2d3fc5a --- /dev/null +++ b/crates/pawarser/src/parser/marker.rs @@ -0,0 +1,74 @@ +use drop_bomb::DropBomb; +use rowan::SyntaxKind; + +use super::{ + error::SyntaxError, + event::{Event, NodeKind}, + Parser, SyntaxElement, +}; + +pub struct Marker { + pos: usize, + bomb: DropBomb, +} + +impl Marker { + pub(super) fn new(pos: usize, name: &str) -> Self { + Self { + pos, + bomb: DropBomb::new(format!("Marker {name} must be completed or abandoned.")), + } + } + + fn close_node( + mut self, + p: &mut Parser, + kind: NodeKind, + ) -> CompletedMarker { + self.bomb.defuse(); + + match &mut p.events[self.pos] { + Event::Start { kind: slot, .. } => *slot = kind.clone(), + _ => unreachable!(), + } + + p.push_ev(Event::Finish); + CompletedMarker { + pos: self.pos, + kind, + } + } + + pub fn complete( + self, + p: &mut Parser, + kind: SyntaxKind, + ) -> CompletedMarker { + self.close_node(p, NodeKind::Syntax(kind)) + } + + pub fn error( + self, + p: &mut Parser, + kind: SyntaxErr, + ) -> CompletedMarker { + self.close_node(p, NodeKind::Error(kind)) + } + + pub fn abandon( + mut self, + p: &mut Parser, + ) { + } +} + +pub struct CompletedMarker { + pos: usize, + kind: NodeKind, +} + +impl CompletedMarker { + pub fn precede(self, p: &mut Parser, name: &str) -> Marker { + todo!() + } +}