pawarser(setup): continue working on the bare basics

This commit is contained in:
Schrottkatze 2024-10-17 09:54:09 +02:00
parent 34ddaacb58
commit 21bcf62ea5
No known key found for this signature in database
5 changed files with 196 additions and 26 deletions

View file

@ -2,18 +2,25 @@ use std::cell::Cell;
use enumset::{EnumSet, EnumSetType}; use enumset::{EnumSet, EnumSetType};
use self::{error::SyntaxError, event::Event, input::Input}; use self::{error::SyntaxError, event::Event, input::Input, marker::Marker};
mod error; mod error;
mod event; mod event;
pub mod input; mod input;
mod marker;
pub struct Parser< /// this is used to define some required SyntaxKinds like an EOF token or an error token
'src, pub trait SyntaxElement
'toks, where
SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, Self: EnumSetType + Into<rowan::SyntaxKind> + Clone,
SyntaxErr: SyntaxError, {
> { /// EOF value. This will be used by the rest of the parser library to represent an EOF.
const EOF: Self;
/// Error value. This will be used as a placeholder for associated respective errors.
const ERROR: Self;
}
pub struct Parser<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
input: Input<'src, 'toks, SyntaxKind>, input: Input<'src, 'toks, SyntaxKind>,
pos: usize, pos: usize,
events: Vec<Event<SyntaxKind, SyntaxErr>>, events: Vec<Event<SyntaxKind, SyntaxErr>>,
@ -21,10 +28,76 @@ pub struct Parser<
steps: Cell<u32>, steps: Cell<u32>,
} }
impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>
Parser<'src, 'toks, SyntaxKind, SyntaxErr>
{
/// eat all meaningless tokens at the end of the file.
pub fn eat_succeeding_meaningless(&mut self) {
self.push_ev(Event::Eat {
count: self.input.meaningless_tail_len(),
});
}
/// Get token from current position of the parser.
pub fn current(&self) -> SyntaxKind {
self.step();
self.input.kind(self.pos)
}
pub fn start(&mut self, name: &str) -> Marker {
let pos = self.events.len();
self.push_ev(Event::tombstone());
Marker::new(pos, name)
}
/// Eat next token if it's of kind `kind` and return `true`.
/// Otherwise, `false`.
pub fn eat(&mut self, kind: SyntaxKind) -> bool {
if !self.at(kind) {
return false;
}
self.do_bump();
true
}
fn do_bump(&mut self) {
self.push_ev(Event::Eat {
count: self.input.preceding_meaningless(self.pos),
});
self.pos += 1;
}
/// Check if the token at the current parser position is of `kind`
pub fn at(&self, kind: SyntaxKind) -> bool {
self.nth_at(0, kind)
}
/// Check if the token that is `n` ahead is of `kind`
pub fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
self.nth(n) == kind
}
pub fn nth(&self, n: usize) -> SyntaxKind {
self.step();
self.input.kind(self.pos + n)
}
fn push_ev(&mut self, event: Event<SyntaxKind, SyntaxErr>) {
self.events.push(event);
}
fn step(&self) {
let steps = self.steps.get();
assert!(steps <= self.step_limit, "the parser seems stuck.");
self.steps.set(steps + 1);
}
}
pub struct ParserBuilder< pub struct ParserBuilder<
'src, 'src,
'toks, 'toks,
SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxKind: SyntaxElement,
// SyntaxErr: SyntaxError, // SyntaxErr: SyntaxError,
> { > {
raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, raw_toks: &'toks Vec<(SyntaxKind, &'src str)>,
@ -32,9 +105,7 @@ pub struct ParserBuilder<
step_limit: u32, step_limit: u32,
} }
impl<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>> impl<'src, 'toks, SyntaxKind: SyntaxElement> ParserBuilder<'src, 'toks, SyntaxKind> {
ParserBuilder<'src, 'toks, SyntaxKind>
{
pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self { pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self {
Self { Self {
raw_toks, raw_toks,

View file

@ -1,3 +1,7 @@
/// A marker trait... for now! /// A marker trait... for now!
// TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible // TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible
pub trait SyntaxError {} pub trait SyntaxError
where
Self: Clone,
{
}

View file

@ -1,8 +1,8 @@
use enumset::EnumSetType; use enumset::EnumSetType;
use super::error::SyntaxError; use super::{error::SyntaxError, SyntaxElement};
pub enum Event<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError> { pub enum Event<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
Start { Start {
kind: NodeKind<SyntaxKind, SyntaxErr>, kind: NodeKind<SyntaxKind, SyntaxErr>,
forward_parent: Option<usize>, forward_parent: Option<usize>,
@ -13,9 +13,7 @@ pub enum Event<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: Syn
}, },
} }
impl<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError> impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> Event<SyntaxKind, SyntaxErr> {
Event<SyntaxKind, SyntaxErr>
{
pub fn tombstone() -> Self { pub fn tombstone() -> Self {
Self::Start { Self::Start {
kind: NodeKind::Tombstone, kind: NodeKind::Tombstone,
@ -24,15 +22,14 @@ impl<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError>
} }
} }
pub enum NodeKind<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError> { #[derive(Clone)]
pub enum NodeKind<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
Tombstone, Tombstone,
Syntax(SyntaxKind), Syntax(SyntaxKind),
Error(SyntaxErr), Error(SyntaxErr),
} }
impl<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError> impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> NodeKind<SyntaxKind, SyntaxErr> {
NodeKind<SyntaxKind, SyntaxErr>
{
pub fn is_tombstone(&self) -> bool { pub fn is_tombstone(&self) -> bool {
matches!(self, Self::Tombstone) matches!(self, Self::Tombstone)
} }

View file

@ -1,6 +1,8 @@
use enumset::{EnumSet, EnumSetType}; use enumset::{EnumSet, EnumSetType};
pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>> { use super::SyntaxElement;
pub struct Input<'src, 'toks, SyntaxKind: SyntaxElement> {
raw: &'toks Vec<(SyntaxKind, &'src str)>, raw: &'toks Vec<(SyntaxKind, &'src str)>,
// enumset of meaningless tokens // enumset of meaningless tokens
semantically_meaningless: EnumSet<SyntaxKind>, semantically_meaningless: EnumSet<SyntaxKind>,
@ -8,9 +10,7 @@ pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>>
meaningful_toks: Vec<usize>, meaningful_toks: Vec<usize>,
} }
impl<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>> impl<'src, 'toks, SyntaxKind: SyntaxElement> Input<'src, 'toks, SyntaxKind> {
Input<'src, 'toks, SyntaxKind>
{
pub fn new( pub fn new(
raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, raw_toks: &'toks Vec<(SyntaxKind, &'src str)>,
meaningless: Option<EnumSet<SyntaxKind>>, meaningless: Option<EnumSet<SyntaxKind>>,
@ -31,4 +31,28 @@ impl<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>>
meaningful_toks, meaningful_toks,
} }
} }
pub fn kind(&self, idx: usize) -> SyntaxKind {
let Some(meaningful_idx) = self.meaningful_toks.get(idx) else {
return SyntaxKind::EOF;
};
self.raw.get(*meaningful_idx).unwrap().0
}
pub fn preceding_meaningless(&self, idx: usize) -> usize {
assert!(self.meaningful_toks.len() > idx);
if idx == 0 {
// maybe should be `self.meaningful_toks[idx]` instead??
1
} else {
self.meaningful_toks[idx] - self.meaningful_toks[idx - 1]
}
}
/// get the count of meaningless tokens at the end of the file.
pub fn meaningless_tail_len(&self) -> usize {
self.raw.len() - (self.meaningful_toks.last().unwrap() + 1)
}
} }

View file

@ -0,0 +1,74 @@
use drop_bomb::DropBomb;
use rowan::SyntaxKind;
use super::{
error::SyntaxError,
event::{Event, NodeKind},
Parser, SyntaxElement,
};
pub struct Marker {
pos: usize,
bomb: DropBomb,
}
impl Marker {
pub(super) fn new(pos: usize, name: &str) -> Self {
Self {
pos,
bomb: DropBomb::new(format!("Marker {name} must be completed or abandoned.")),
}
}
fn close_node<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
mut self,
p: &mut Parser<SyntaxKind, SyntaxErr>,
kind: NodeKind<SyntaxKind, SyntaxErr>,
) -> CompletedMarker<SyntaxKind, SyntaxErr> {
self.bomb.defuse();
match &mut p.events[self.pos] {
Event::Start { kind: slot, .. } => *slot = kind.clone(),
_ => unreachable!(),
}
p.push_ev(Event::Finish);
CompletedMarker {
pos: self.pos,
kind,
}
}
pub fn complete<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
self,
p: &mut Parser<SyntaxKind, SyntaxErr>,
kind: SyntaxKind,
) -> CompletedMarker<SyntaxKind, SyntaxErr> {
self.close_node(p, NodeKind::Syntax(kind))
}
pub fn error<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
self,
p: &mut Parser<SyntaxKind, SyntaxErr>,
kind: SyntaxErr,
) -> CompletedMarker<SyntaxKind, SyntaxErr> {
self.close_node(p, NodeKind::Error(kind))
}
pub fn abandon<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
mut self,
p: &mut Parser<SyntaxKind, SyntaxErr>,
) {
}
}
pub struct CompletedMarker<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
pos: usize,
kind: NodeKind<SyntaxKind, SyntaxErr>,
}
impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> CompletedMarker<SyntaxKind, SyntaxErr> {
pub fn precede(self, p: &mut Parser<SyntaxKind, SyntaxErr>, name: &str) -> Marker {
todo!()
}
}