From a3ab844ba7f016e3650e2228a8381aa500ae2add Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Sun, 13 Oct 2024 15:32:26 +0200 Subject: [PATCH] pawarser(init): start extracting the parser lib --- Cargo.lock | 9 +++++++++ Cargo.toml | 2 +- crates/pawarser/Cargo.toml | 12 ++++++++++++ crates/pawarser/src/lib.rs | 39 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 crates/pawarser/Cargo.toml create mode 100644 crates/pawarser/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 25bef26..4304154 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1159,6 +1159,15 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pawarser" +version = "0.1.0" +dependencies = [ + "drop_bomb", + "enumset", + "rowan", +] + [[package]] name = "petgraph" version = "0.6.5" diff --git a/Cargo.toml b/Cargo.toml index 6b774ed..e5c6dc7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ members = [ "crates/lang", "crates/svg-filters", "crates/prowocessing", - "crates/executor-poc", + "crates/executor-poc", "crates/pawarser", ] resolver = "2" diff --git a/crates/pawarser/Cargo.toml b/crates/pawarser/Cargo.toml new file mode 100644 index 0000000..787cb2f --- /dev/null +++ b/crates/pawarser/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "pawarser" +version = "0.1.0" +edition = "2021" + +[dependencies] +rowan = "0.15.15" +drop_bomb = "0.1.5" +enumset = "1.1.3" + +[lints] +workspace = true diff --git a/crates/pawarser/src/lib.rs b/crates/pawarser/src/lib.rs new file mode 100644 index 0000000..f1413cb --- /dev/null +++ b/crates/pawarser/src/lib.rs @@ -0,0 +1,39 @@ +#![feature(iter_collect_into)] +pub mod parser { + pub mod input { + use enumset::{EnumSet, EnumSetType}; + + struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into> { + raw: &'toks Vec<(SyntaxKind, &'src str)>, + // enumset of meaningless tokens + semantically_meaningless: EnumSet, + // indices of non-meaningless tokens + meaningful_toks: Vec, + } + + impl<'src, 'toks, SyntaxKind: EnumSetType + Into> + Input<'src, 'toks, SyntaxKind> + { + pub fn new( + raw_toks: &'toks Vec<(SyntaxKind, &'src str)>, + meaningless: Option>, + ) -> Self { + let mut meaningful_toks = Vec::new(); + + if let Some(meaningless) = meaningless { + let meaningful_toks = raw_toks + .iter() + .enumerate() + .filter_map(|(i, tok)| (!meaningless.contains(tok.0)).then_some(i)) + .collect_into(&mut meaningful_toks); + } + + Self { + raw: raw_toks, + semantically_meaningless: meaningless.unwrap_or_default(), + meaningful_toks, + } + } + } + } +}