From b8720b2df99e9019691a8e0e08d2dddf28c561e8 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Mon, 21 Oct 2024 18:29:46 +0200 Subject: [PATCH] pawarser, json-pawarser: get first debug print working! --- crates/json-pawarser/src/grammar.rs | 13 +++-- crates/json-pawarser/src/lib.rs | 26 +++++++++ crates/json-pawarser/src/syntax_error.rs | 2 +- crates/json-pawarser/src/syntax_kind.rs | 25 ++++++--- crates/pawarser/src/parser.rs | 26 ++++++--- crates/pawarser/src/parser/error.rs | 4 +- crates/pawarser/src/parser/output.rs | 67 ++++++++++++++++++++++++ 7 files changed, 140 insertions(+), 23 deletions(-) create mode 100644 crates/pawarser/src/parser/output.rs diff --git a/crates/json-pawarser/src/grammar.rs b/crates/json-pawarser/src/grammar.rs index 2f1745c..466f22c 100644 --- a/crates/json-pawarser/src/grammar.rs +++ b/crates/json-pawarser/src/grammar.rs @@ -4,8 +4,8 @@ use crate::{syntax_error::SyntaxError, syntax_kind::SyntaxKind}; use self::object::object; -type Parser<'src> = pawarser::Parser<'src, SyntaxKind, SyntaxError>; -type CompletedMarker = pawarser::CompletedMarker; +pub(crate) type Parser<'src> = pawarser::Parser<'src, SyntaxKind, SyntaxError>; +pub(crate) type CompletedMarker = pawarser::CompletedMarker; const BASIC_VALUE_TOKENS: EnumSet = enum_set!(SyntaxKind::BOOL | SyntaxKind::NULL | SyntaxKind::NUMBER | SyntaxKind::STRING); @@ -27,12 +27,15 @@ mod object { pub(super) fn object(p: &mut Parser) -> Option { let obj_start = p.start("object"); - if !p.at(SyntaxKind::BRACE_OPEN) { + if !p.eat(SyntaxKind::BRACE_OPEN) { obj_start.abandon(p); return None; } - todo!() + member(p); + + p.eat(SyntaxKind::BRACE_CLOSE); + Some(obj_start.complete(p, SyntaxKind::OBJECT)) } fn member(p: &mut Parser) -> Option { @@ -46,7 +49,7 @@ mod object { p.eat(SyntaxKind::STRING); member_name_start.complete(p, SyntaxKind::MEMBER_NAME); } else { - return todo!("handle other tokens"); + return todo!("handle other tokens: {:?}", p.current()); } if !p.eat(SyntaxKind::COLON) { diff --git a/crates/json-pawarser/src/lib.rs b/crates/json-pawarser/src/lib.rs index 89160be..05c529e 100644 --- a/crates/json-pawarser/src/lib.rs +++ b/crates/json-pawarser/src/lib.rs @@ -1,3 +1,29 @@ mod grammar; mod syntax_error; mod syntax_kind; + +#[cfg(test)] +mod test { + use pawarser::parser::ParserBuilder; + + use crate::{ + grammar::{value, Parser}, + syntax_kind::{lex, SyntaxKind}, + }; + + #[test] + fn test() { + const TEST_DATA: &str = r#"{"hello_world": "meow"}"#; + let toks = lex(TEST_DATA); + + let mut p: Parser = ParserBuilder::new(toks) + .add_meaningless(SyntaxKind::WHITESPACE) + .add_meaningless(SyntaxKind::NEWLINE) + .build(); + + value(&mut p); + + let out = p.finish(); + assert_eq!("", format!("{:#?}", out)) + } +} diff --git a/crates/json-pawarser/src/syntax_error.rs b/crates/json-pawarser/src/syntax_error.rs index 84377d7..45bb5bd 100644 --- a/crates/json-pawarser/src/syntax_error.rs +++ b/crates/json-pawarser/src/syntax_error.rs @@ -1,6 +1,6 @@ use crate::syntax_kind::SyntaxKind; -#[derive(Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum SyntaxError { DisallowedKeyType(SyntaxKind), MemberMissingValue, diff --git a/crates/json-pawarser/src/syntax_kind.rs b/crates/json-pawarser/src/syntax_kind.rs index 9d3dc2c..dfaab13 100644 --- a/crates/json-pawarser/src/syntax_kind.rs +++ b/crates/json-pawarser/src/syntax_kind.rs @@ -16,14 +16,6 @@ pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> { #[enumset(no_super_impls)] #[allow(non_camel_case_types)] pub enum SyntaxKind { - // Error SyntaxKinds - LEX_ERR, - PARSE_ERR, - - // Meta SyntaxKinds - TOMBSTONE, - EOF, - OBJECT, MEMBER, MEMBER_NAME, @@ -61,6 +53,13 @@ pub enum SyntaxKind { WHITESPACE, #[token("\n")] NEWLINE, + + // Error SyntaxKinds + LEX_ERR, + PARSE_ERR, + + // Meta SyntaxKinds + EOF, } impl pawarser::parser::SyntaxElement for SyntaxKind { @@ -75,6 +74,16 @@ impl From for rowan::SyntaxKind { } } +impl From for SyntaxKind { + fn from(raw: rowan::SyntaxKind) -> Self { + assert!(raw.0 <= SyntaxKind::EOF as u16); + #[allow(unsafe_code, reason = "The transmute is necessary here")] + unsafe { + std::mem::transmute::(raw.0) + } + } +} + #[cfg(test)] mod tests { use crate::syntax_kind::{lex, SyntaxKind}; diff --git a/crates/pawarser/src/parser.rs b/crates/pawarser/src/parser.rs index ccbb5b2..2a666bf 100644 --- a/crates/pawarser/src/parser.rs +++ b/crates/pawarser/src/parser.rs @@ -1,4 +1,4 @@ -use std::{cell::Cell, marker::PhantomData, mem}; +use std::{cell::Cell, fmt, marker::PhantomData, mem}; use enumset::{EnumSet, EnumSetType}; use rowan::{GreenNode, GreenNodeBuilder}; @@ -6,17 +6,24 @@ use rowan::{GreenNode, GreenNodeBuilder}; use crate::parser::event::NodeKind; use self::{event::Event, input::Input, marker::Marker}; -pub use error::SyntaxError; +pub use {error::SyntaxError, output::ParserOutput}; pub mod error; mod event; mod input; pub mod marker; +pub mod output; /// this is used to define some required SyntaxKinds like an EOF token or an error token pub trait SyntaxElement where - Self: EnumSetType + Into + Clone + PartialEq + Eq, + Self: EnumSetType + + Into + + From + + fmt::Debug + + Clone + + PartialEq + + Eq, { /// EOF value. This will be used by the rest of the parser library to represent an EOF. const EOF: Self; @@ -97,15 +104,14 @@ impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> self.steps.set(steps + 1); } - pub fn finish( - Self { + pub fn finish(self) -> ParserOutput { + let Self { input, pos, mut events, step_limit, steps, - }: Self, - ) -> ParserOutput { + } = self; let (mut raw_toks, meaningless_tokens) = input.dissolve(); let mut builder = GreenNodeBuilder::new(); // TODO: document what the hell a forward parent is @@ -178,7 +184,11 @@ impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> } } - todo!() + ParserOutput { + green_node: builder.finish(), + errors, + _syntax_kind: PhantomData::, + } } } diff --git a/crates/pawarser/src/parser/error.rs b/crates/pawarser/src/parser/error.rs index 07c033d..9c9d893 100644 --- a/crates/pawarser/src/parser/error.rs +++ b/crates/pawarser/src/parser/error.rs @@ -1,7 +1,9 @@ +use std::fmt; + /// A marker trait... for now! // TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible pub trait SyntaxError where - Self: Clone + PartialEq + Eq, + Self: fmt::Debug + Clone + PartialEq + Eq, { } diff --git a/crates/pawarser/src/parser/output.rs b/crates/pawarser/src/parser/output.rs new file mode 100644 index 0000000..bea13e8 --- /dev/null +++ b/crates/pawarser/src/parser/output.rs @@ -0,0 +1,67 @@ +use std::{fmt, marker::PhantomData}; + +use rowan::{GreenNode, GreenNodeData, GreenTokenData, NodeOrToken}; + +use crate::{SyntaxElement, SyntaxError}; + +pub struct ParserOutput { + pub green_node: GreenNode, + pub errors: Vec, + pub(super) _syntax_kind: PhantomData, +} + +impl std::fmt::Debug + for ParserOutput +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut errs: Vec<&SyntaxErr> = self.errors.iter().collect(); + errs.reverse(); + debug_print_output::( + NodeOrToken::Node(&self.green_node), + f, + 0, + &mut errs, + ) + } +} + +fn debug_print_output( + node: NodeOrToken<&GreenNodeData, &GreenTokenData>, + f: &mut std::fmt::Formatter<'_>, + lvl: i32, + errs: &mut Vec<&SyntaxErr>, +) -> std::fmt::Result { + for _ in 0..lvl { + f.write_str(" ")?; + } + + match node { + NodeOrToken::Node(n) => { + let kind: SyntaxKind = node.kind().into(); + if kind != SyntaxKind::ERROR { + writeln!(f, "{:?} {{", kind)?; + } else { + let err = errs + .pop() + .expect("all error syntax nodes should correspond to an error"); + + writeln!(f, "{:?}: {err:?} {{", kind)?; + } + for c in n.children() { + debug_print_output::(c, f, lvl + 1, errs)?; + } + for _ in 0..lvl { + f.write_str(" ")?; + } + f.write_str("}\n") + } + NodeOrToken::Token(t) => { + writeln!( + f, + "{:?} {:?};", + Into::::into(t.kind()), + t.text() + ) + } + } +}