iowo/crates/lang/src/parser.rs

242 lines
9.6 KiB
Rust
Raw Normal View History

2024-04-03 00:08:00 +02:00
use std::ops::Range;
use chumsky::{
error::Rich,
extra,
input::{Stream, ValueInput},
prelude::*,
primitive::just,
recursive::recursive,
select,
span::SimpleSpan,
IterParser, Parser,
};
use indexmap::IndexMap;
use logos::{Logos, Source};
use crate::tokens::Token;
pub mod ast;
use self::ast::{Expr, File};
type Span = SimpleSpan;
type Spanned<T> = (T, Span);
pub fn parse<'src>(src: &'src str) -> ParseResult<File<'_>, Rich<'_, Token<'_>>> {
let toks: Vec<_> = Token::lexer(src)
.spanned()
.into_iter()
.map(|(t, s)| (t.expect("TODO: add lexer error(s)"), Span::from(s)))
.collect();
let tok_stream = Stream::from_iter(toks).spanned((src.len()..src.len()).into());
expr_parser().parse(tok_stream)
}
fn expr_parser<'tokens, 'src: 'tokens, I: ValueInput<'tokens, Token = Token<'src>, Span = Span>>(
) -> impl Parser<'tokens, I, File<'src>, extra::Err<Rich<'tokens, Token<'src>, Span>>> {
let word = select! { Token::Word(word) => word };
let expr = recursive(|expr| {
let var = select! {
Token::VarIdent(name) => (Expr::Var as fn(_) -> _, name),
Token::InputIdent(name) => (Expr::InputVar as fn(_) -> _, name)
}
.map_with(|(item_type, name), extra| item_type((name, extra.span())));
let attrset = word
.map_with(|n, e| (n, e.span()))
.then_ignore(just(Token::Colon))
.then(expr)
.separated_by(just(Token::Comma))
.collect::<Vec<_>>()
.map(IndexMap::from_iter)
.delimited_by(just(Token::BracketOpen), just(Token::BracketClose))
.map_with(|v, e| (v, e.span()));
let node = word
.map_with(|v, e| (v, e.span()))
.then(attrset.clone().or_not())
.map(|(name, params)| Expr::Node(name, params))
.or(var)
.or(attrset.map(Expr::AttrSet));
let pipeline = node
.clone()
.then(choice((
just(Token::Pipe).to(Expr::SimplePipe as fn(_, _) -> _),
just(Token::MappingPipe).to(Expr::MappingPipe as fn(_, _) -> _),
just(Token::NullPipe).to(Expr::NullPipe as fn(_, _) -> _),
)))
.repeated()
.foldr(node, |(curr, pipe), next| {
pipe(Box::new(curr), Box::new(next))
});
pipeline
});
let decl = just(Token::Def).ignore_then(
word.map_with(|n, e| (n, e.span()))
.then_ignore(just(Token::Equals))
.then(expr.clone().map_with(|expr, extra| (expr, extra.span())))
.then_ignore(just(Token::SemiColon)),
);
expr.map_with(|expr, extra| File {
decls: IndexMap::from_iter([(("main", (0..0).into()), (expr, extra.span()))]),
})
.or(decl.repeated().collect::<Vec<_>>().map(|decls| File {
decls: IndexMap::from_iter(decls),
}))
}
#[cfg(test)]
mod tests {
use crate::parser::ast::{Expr, File};
use crate::parser::parse;
use crate::tokens::Token;
use chumsky::input::Stream;
use chumsky::prelude::*;
use indexmap::IndexMap;
use logos::Logos;
#[test]
fn test_parse_node_with_params() {
const INPUT: &str = "meow [ hello: $foo, world: @bar]";
assert_eq!(
parse(INPUT).unwrap(),
File {
decls: IndexMap::from_iter([(
("main", (0..0).into()),
(
Expr::Node(
("meow", (0..4).into()),
Some((
IndexMap::from_iter([
(
("hello", (7..12).into()),
Expr::Var(("foo", (14..18).into()))
),
(
("world", (20..25).into()),
Expr::InputVar(("bar", (27..31).into()))
)
]),
(5..32).into()
))
),
2024-04-03 00:28:45 +02:00
(0..32).into()
2024-04-03 00:08:00 +02:00
)
)])
}
);
}
2024-04-03 00:28:45 +02:00
fn test_parse_multiple_top_level_complex() {
const INPUT: &str = r#"def main = meow
| uwu
[ foo: @bar
, hello: world @| test [ more: params ] | yay
]
!| awa
@| nya
| rawr;
def test = meow
[ hello: $foo
, world: @bar
];
"#;
assert_eq!(
parse(INPUT).unwrap(),
File {
decls: IndexMap::from_iter([
(
("main", (4..8).into()),
(
Expr::SimplePipe(
Box::new(Expr::Node(("meow", (11..15).into()), None)),
Box::new(Expr::NullPipe(
Box::new(Expr::Node(
("uwu", (20..23).into()),
Some((
IndexMap::from_iter([
(
("foo", (29..32).into()),
Expr::InputVar(("bar", (34..38).into()))
),
(
("hello", (44..49).into()),
Expr::MappingPipe(
Box::new(Expr::Node(
("world", (51..56).into()),
None
)),
Box::new(Expr::SimplePipe(
Box::new(Expr::Node(
("test", (60..64).into()),
Some((
IndexMap::from_iter([(
("more", (67..71).into()),
Expr::Node(
(
"params",
(73..79).into()
),
None
)
)]),
(65..81).into()
))
)),
Box::new(Expr::Node(
("yay", (84..87).into()),
None
))
))
)
)
]),
(27..92).into()
))
)),
Box::new(Expr::MappingPipe(
Box::new(Expr::Node(("awa", (97..100).into()), None)),
Box::new(Expr::SimplePipe(
Box::new(Expr::Node(("nya", (106..109).into()), None)),
Box::new(Expr::Node(("rawr", (114..118).into()), None))
))
))
))
),
(11..118).into()
),
),
(
("test", (125..129).into()),
(
Expr::Node(
("meow", (132..136).into()),
Some((
IndexMap::from_iter([
(
("hello", (141..146).into()),
Expr::Var(("foo", (148..152).into()))
),
(
("world", (156..161).into()),
Expr::InputVar(("bar", (163..167).into()))
)
]),
(139..171).into()
))
),
(132..171).into()
)
)
])
}
);
}
2024-04-03 00:08:00 +02:00
}