lang: massive amounts of parser and ast pain

2026-07-21 17:31:16 +02:00 · 2024-04-11 03:23:03 +02:00 · 2024-04-11 03:23:03 +02:00 · 9da157ff4a
commit 9da157ff4a
parent 881a987b2f
16 changed files with 901 additions and 171 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -174,9 +174,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"

 [[package]]
 name = "chumsky"
-version = "1.0.0-alpha.6"
+version = "1.0.0-alpha.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9c28d4e5dd9a9262a38b231153591da6ce1471b818233f4727985d3dd0ed93c"
+checksum = "c7b80276986f86789dc56ca6542d53bba9cda3c66091ebbe7bd96fc1bdf20f1f"
 dependencies = [
 "hashbrown",
 "regex-automata",
@ -237,6 +237,12 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"

+[[package]]
+name = "countme"
+version = "3.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636"
+
 [[package]]
 name = "crc32fast"
 version = "1.3.2"
@ -322,6 +328,12 @@ dependencies = [
 "windows-sys 0.48.0",
 ]

+[[package]]
+name = "ego-tree"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
+
 [[package]]
 name = "either"
 version = "1.9.0"
@ -506,9 +518,11 @@ dependencies = [
 "ariadne",
 "chumsky",
 "clap",
+ "ego-tree",
 "indexmap",
 "logos",
 "petgraph",
+ "rowan",
 ]

 [[package]]
@ -589,6 +603,15 @@ version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"

+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "miniz_oxide"
 version = "0.7.1"
@ -863,6 +886,25 @@ dependencies = [
 "serde_derive",
 ]

+[[package]]
+name = "rowan"
+version = "0.15.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a58fa8a7ccff2aec4f39cc45bf5f985cec7125ab271cf681c279fd00192b49"
+dependencies = [
+ "countme",
+ "hashbrown",
+ "memoffset",
+ "rustc-hash",
+ "text-size",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
 [[package]]
 name = "ryu"
 version = "1.0.16"
@ -973,6 +1015,12 @@ dependencies = [
 "unicode-ident",
 ]

+[[package]]
+name = "text-size"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233"
+
 [[package]]
 name = "thiserror"
 version = "1.0.55"
--- a/crates/lang/Cargo.toml
+++ b/crates/lang/Cargo.toml
@ -7,11 +7,13 @@ edition = "2021"

 [dependencies]
 logos = "0.14"
-chumsky = {version= "1.0.0-alpha.6", features=["label"]}
+chumsky = {version= "1.0.0-alpha.7", features=["label"]}
 petgraph = { workspace = true}
 indexmap = "2.2.6"
 clap = { version = "4", features = ["derive"] }
 ariadne = "0.4.0"
+ego-tree = "0.6.2"
+rowan = "0.15.15"

 [lints]
 workspace = true
--- a/crates/lang/src/main.rs
+++ b/crates/lang/src/main.rs
@ -1,7 +1,10 @@
 use std::{fs, path::PathBuf};

 use clap::Parser;
-use lang::{err_reporting::ErrorCollector, parser::parse};
+use lang::{
+    err_reporting::ErrorCollector,
+    parser::ast::lossless::{lex, parser},
+};

 #[derive(Parser)]
 struct Args {
@ -13,20 +16,24 @@ fn main() {
    let args = Args::parse();
    let n = args.file.clone();
    let f = fs::read_to_string(n.clone()).expect("failed to read file");
-    let mut err_collector = ErrorCollector::new(vec![(n.to_str().unwrap(), &f)]);
+    println!("toks: {:?}", lex::lex(&f));
+    let parse_res = parser::parse(&f);
+    println!("parse: {:?}", parse_res);
+    // dbg!(lex::lex(&f));
+    // let mut err_collector = ErrorCollector::new(vec![(n.to_str().unwrap(), &f)]);

-    println!("file: {f}\n");
-    let parse_res = parse(&f);
-    err_collector.insert_many(
-        args.file.to_str().unwrap(),
-        lang::err_reporting::Stage::Parse,
-        parse_res
-            .errors()
-            .into_iter()
-            .map(|e| e.to_owned())
-            .collect::<Vec<_>>(),
-    );
+    // println!("file: {f}\n");
+    // let parse_res = parse(&f);
+    // err_collector.insert_many(
+    //     args.file.to_str().unwrap(),
+    //     lang::err_reporting::Stage::Parse,
+    //     parse_res
+    //         .errors()
+    //         .into_iter()
+    //         .map(|e| e.to_owned())
+    //         .collect::<Vec<_>>(),
+    // );

-    err_collector.report_raw();
-    println!("res: {:?}", parse_res);
+    // err_collector.report_raw();
+    // println!("res: {:?}", parse_res);
 }
--- a/crates/lang/src/parser.rs
+++ b/crates/lang/src/parser.rs
@ -1,5 +1,3 @@
-use std::ops::Range;
-
 use chumsky::{
    error::Rich,
    input::{Stream, ValueInput},
@ -10,19 +8,22 @@ use chumsky::{
    IterParser,
 };
 use indexmap::IndexMap;
-use logos::{Logos, Source};
+use logos::Logos;

 use crate::tokens::Token;

 pub mod ast;
 #[cfg(test)]
 mod tests;
-use self::ast::{Expr, Expression, File};
+use self::ast::{
+    raw_ast::{RawExpr, RawExpression},
+    File,
+};

 pub type Span = SimpleSpan;
 pub type Spanned<T> = (T, Span);

-pub fn parse<'src>(src: &'src str) -> ParseResult<File<'_>, Rich<'_, Token<'_>>> {
+pub fn parse(src: &str) -> ParseResult<File<'_>, Rich<'_, Token<'_>>> {
    let toks: Vec<_> = Token::lexer(src)
        .spanned()
        .map(|(t, s)| (t.expect("TODO: add lexer error(s)"), Span::from(s)))
@ -35,22 +36,39 @@ pub(crate) fn parser<
    'src: 'tokens,
    I: ValueInput<'tokens, Token = Token<'src>, Span = Span>,
 >() -> impl Parser<'tokens, I, File<'src>, extra::Err<Rich<'tokens, Token<'src>, Span>>> {
-    let word = select! { Token::Word(word) => word };
+    let word = select! { Token::Word(word) = e => (word, e.span())};

    let expr = recursive(|expr| {
        let lit = select! {
-            Token::Int(i) = e => Expression::new(Expr::Lit(ast::Lit::Int(i.parse().unwrap())), e.span()),
-            Token::Float(f) = e => Expression::new(Expr::Lit(ast::Lit::Float(f.parse().unwrap())), e.span()),
+            Token::Int(i) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Int(i.parse().expect("TODO: handle better"))), e.span()),
+            Token::Float(f) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Float(f.parse().expect("TODO: handle better"))), e.span()),
+            Token::String(s) = e => RawExpression::new(RawExpr::Lit(ast::Lit::String(s.strip_prefix('"').expect("a").strip_suffix('"').expect("b"))), e.span())
        };
+        let mat = just(Token::Mat)
+            .ignore_then(select! { Token::Dimensions(dimensions) = e => (dimensions, e.span())})
+            .then(
+                lit.separated_by(just(Token::Comma))
+                    .collect::<Vec<_>>()
+                    .separated_by(just(Token::Semicolon))
+                    .collect::<Vec<_>>()
+                    .delimited_by(just(Token::BracketOpen), just(Token::BracketClose)),
+            )
+            .map_with(|(dimensions, data), e| {
+                // TODO: Validation and proper error handling/reporting
+                // (validation = validating the matrix dimensions)
+                RawExpression::new(
+                    RawExpr::Matrix(dimensions, data.into_iter().flatten().collect()),
+                    e.span(),
+                )
+            });
        let var = select! {
-            Token::VarIdent(name) => (Expr::Var as fn(_) -> _, name),
-            Token::InputIdent(name) => (Expr::InputVar as fn(_) -> _, name)
+            Token::VarIdent(name) => (RawExpr::Var as fn(_) -> _, name),
+            Token::InputIdent(name) => (RawExpr::InputVar as fn(_) -> _, name)
        }
-        .map_with(|(item_type, name), extra| Expression::new(item_type(name), extra.span()))
+        .map_with(|(item_type, name), extra| RawExpression::new(item_type(name), extra.span()))
        .labelled("variable");

        let attrset = word
-            .map_with(|n, e| (n, e.span()))
            .labelled("attr name")
            .then_ignore(just(Token::Colon))
            .then(expr)
@ -63,57 +81,72 @@ pub(crate) fn parser<
            .labelled("attrset");

        let node = word
-            .map_with(|v, e| (v, e.span()))
+            .repeated()
+            .collect()
            .then(attrset.clone().or_not())
            .map_with(|(name, params), extra| {
-                Expression::new(Expr::Node(name, params), extra.span())
+                RawExpression::new(RawExpr::Node(name, params), extra.span())
            })
-            .or(var)
-            .or(attrset
-                .map_with(|attrset, extra| Expression::new(Expr::AttrSet(attrset), extra.span())))
+            // .or(var)
+            // .or(attrset
+            //     .map_with(|attrset, extra| Expression::new(Expr::AttrSet(attrset), extra.span())))
+            // .or(lit)
+            // .or(mat)
            .labelled("node");

+        let atom = var
+            .or(lit)
+            .or(mat)
+            .or(attrset.map_with(|attrset, extra| {
+                RawExpression::new(RawExpr::AttrSet(attrset), extra.span())
+            }))
+            .or(node.clone());
+
        #[allow(clippy::let_and_return)]
-        let pipeline = node
+        let pipeline = atom
            .clone()
            .then(choice((
-                just(Token::Pipe).to(Expr::SimplePipe as fn(_, _) -> _),
-                just(Token::MappingPipe).to(Expr::MappingPipe as fn(_, _) -> _),
-                just(Token::NullPipe).to(Expr::NullPipe as fn(_, _) -> _),
+                just(Token::Pipe).to(RawExpr::SimplePipe as fn(_, _) -> _),
+                just(Token::MappingPipe).to(RawExpr::MappingPipe as fn(_, _) -> _),
+                just(Token::NullPipe).to(RawExpr::NullPipe as fn(_, _) -> _),
            )))
            .repeated()
-            .foldr_with(node, |(curr, pipe), next, extra| {
-                Expression::new(pipe(Box::new(curr), Box::new(next)), extra.span())
+            .foldr_with(atom, |(curr, pipe), next, extra| {
+                RawExpression::new(pipe(curr, next), extra.span())
            });

        pipeline
    });

-    let decl = just(Token::Def).ignore_then(
-        word.map_with(|n, e| (n, e.span()))
-            .then_ignore(just(Token::Equals))
+    let decls = just(Token::Def)
+        .ignore_then(
+            word.then_ignore(just(Token::Equals))
                .then(expr.clone().map(|expr| expr))
-            .then_ignore(just(Token::SemiColon)),
-    );
-
-    expr.map(|expr| File {
-        decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]),
-    })
-    .or(decl.repeated().collect::<Vec<_>>().map(|decls| File {
+                .then_ignore(just(Token::Semicolon)),
+        )
+        .repeated()
+        .collect::<Vec<_>>()
+        .map(|decls| File {
            decls: IndexMap::from_iter(decls),
-    }))
+        });
+
+    let single_expr = expr.map(|expr| File {
+        decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]),
+    });
+
+    just(Token::Def).rewind().ignore_then(decls).or(single_expr)
+    // single_expr.or(decls)
+
+    // expr.map(|expr| File {
+    //     decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]),
+    // })
+    // .or(decl.repeated().collect::<Vec<_>>().map(|decls| File {
+    //     decls: IndexMap::from_iter(decls),
+    // }))
 }

 pub mod asg {
    use petgraph::graph::DiGraph;

    use super::Spanned;
-
-    pub struct Asg<'src> {
-        graph: DiGraph<AsgNode<'src>, String>,
-    }
-
-    enum AsgNode<'src> {
-        Node(Spanned<&'src str>),
-    }
 }
--- a/crates/lang/src/parser/ast.rs
+++ b/crates/lang/src/parser/ast.rs
@ -2,51 +2,14 @@ use std::collections::{BTreeMap, HashMap};

 use indexmap::IndexMap;

-use super::{Span, Spanned};
+use super::Spanned;

 #[derive(Debug, PartialEq)]
 pub struct File<'src> {
-    pub decls: IndexMap<Spanned<&'src str>, Expression<'src>>,
+    pub decls: IndexMap<Spanned<&'src str>, raw_ast::RawExpression<'src>>,
 }

-#[derive(Debug, PartialEq)]
-pub struct Expression<'src> {
-    pub expr: Expr<'src>,
-    pub span: Span,
-}
-
-impl<'src> Expression<'src> {
-    pub fn new(expr: Expr<'src>, span: Span) -> Self {
-        Self { expr, span }
-    }
-}
-
-#[derive(Debug, PartialEq)]
-pub enum Expr<'src> {
-    Node(
-        Spanned<&'src str>,
-        Option<Spanned<IndexMap<Spanned<&'src str>, Expression<'src>>>>,
-    ),
-    SimplePipe(Box<Expression<'src>>, Box<Expression<'src>>),
-    // NamingPipe(
-    //     Box<Expression<'src>>,
-    //     (Vec<Spanned<&'src str>>, Vec<Spanned<&'src str>>),
-    //     Box<Expression<'src>>,
-    // ),
-    MappingPipe(Box<Expression<'src>>, Box<Expression<'src>>),
-    NullPipe(Box<Expression<'src>>, Box<Expression<'src>>),
-    MultiPipe(IndexMap<Spanned<&'src str>, Expression<'src>>),
-    // LetIn(
-    //     IndexMap<Spanned<&'src str>, Box<Expression<'src>>>,
-    //     Box<Expression<'src>>,
-    // ),
-    // $
-    Var(&'src str),
-    // @
-    InputVar(&'src str),
-    AttrSet(Spanned<IndexMap<Spanned<&'src str>, Expression<'src>>>),
-    Lit(Lit<'src>),
-}
+pub mod raw_ast;

 #[derive(Debug, PartialEq)]
 pub enum Lit<'src> {
@ -55,3 +18,7 @@ pub enum Lit<'src> {
    Float(f64),
    String(&'src str),
 }
+
+pub mod lossless;
+
+pub mod ast_tree;
--- a/crates/lang/src/parser/ast/ast_tree.rs
+++ b/crates/lang/src/parser/ast/ast_tree.rs
@ -0,0 +1,31 @@
+use ego_tree::Tree;
+
+use crate::parser::Spanned;
+
+use super::{File, Lit};
+
+pub struct Ast<'src> {
+    tree: Tree<AstNode<'src>>,
+}
+
+struct AstNode<'src> {
+    kind: NodeKind<'src>,
+}
+
+enum NodeKind<'src> {
+    Decl,
+    Ident(&'src str),
+    Instr,
+    Expr,
+    MappingPipe,
+    NullPipe,
+    MultiPipe,
+    Var(&'src str),
+    InputVar(&'src str),
+    AttrSet,
+    Attr,
+    Lit(Lit<'src>),
+    Matrix,
+    Dimensions(u16, u16),
+    MatrixRow,
+}
--- a/crates/lang/src/parser/ast/lossless.rs
+++ b/crates/lang/src/parser/ast/lossless.rs
@ -0,0 +1,19 @@
+use self::lex::SyntaxKind;
+
+pub mod parser;
+
+pub mod lex;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+enum Lang {}
+impl rowan::Language for Lang {
+    type Kind = SyntaxKind;
+    #[allow(unsafe_code)]
+    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
+        assert!(raw.0 <= SyntaxKind::ROOT as u16);
+        unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
+    }
+    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
+        kind.into()
+    }
+}
--- a/crates/lang/src/parser/ast/lossless/lex.rs
+++ b/crates/lang/src/parser/ast/lossless/lex.rs
@ -0,0 +1,118 @@
+use logos::Logos;
+
+use crate::parser::Span;
+
+pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
+    let mut lex = SyntaxKind::lexer(src);
+    let mut r = Vec::new();
+
+    while let Some(tok_res) = lex.next() {
+        r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
+    }
+
+    r.reverse();
+    r
+}
+
+#[derive(Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
+#[repr(u16)]
+#[allow(non_camel_case_types)]
+pub enum SyntaxKind {
+    #[token("def")]
+    DEF_KW = 0,
+    #[token("let")]
+    LET_KW,
+    #[token("in")]
+    IN_KW,
+    #[token("mat")]
+    MAT_KW,
+    #[regex("[\\d]+x[\\d]+")]
+    PAT_DIMENSIONS,
+    #[regex("[\\d]+")]
+    INT_NUM,
+    #[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)")]
+    FLOAT_NUM,
+    #[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
+    STRING,
+    MATRIX,
+    DECL,
+    LIST,
+    MAT_BODY,
+    PARENTHESIZED_EXPR,
+    EXPR,
+    #[token("(")]
+    L_PAREN,
+    #[token(")")]
+    R_PAREN,
+    #[token("{")]
+    L_CURLY,
+    #[token("}")]
+    R_CURLY,
+    #[token("[")]
+    L_BRACK,
+    #[token("]")]
+    R_BRACK,
+    #[token("<")]
+    L_ANGLE,
+    #[token(">")]
+    R_ANGLE,
+    #[token("+")]
+    PLUS,
+    #[token("-")]
+    MINUS,
+    #[token("*")]
+    STAR,
+    #[token("/")]
+    SLASH,
+    #[token("%")]
+    PERCENT,
+    #[token("^")]
+    CARET,
+    INSTR,
+    INSTR_NAME,
+    INSTR_PARAMS,
+    ATTR_SET,
+    ATTR,
+    ATTR_NAME,
+    ATTR_VALUE,
+    #[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*")]
+    IDENT,
+    #[regex("\\$[a-zA-Z0-9_\\-]+")]
+    VAR,
+    #[regex("\\@[a-zA-Z0-9_\\-]+")]
+    INPUT_VAR,
+    #[token("$")]
+    DOLLAR,
+    #[token("@")]
+    AT,
+    #[token(",")]
+    COMMA,
+    #[token("|")]
+    PIPE,
+    #[token("@|")]
+    MAPPING_PIPE,
+    #[token("!|")]
+    NULL_PIPE,
+    #[token("=")]
+    EQ,
+    #[token(":")]
+    COLON,
+    #[token(";")]
+    SEMICOLON,
+    #[token(".")]
+    DOT,
+    #[token("!")]
+    BANG,
+    #[regex("[ \\t\\f]+")]
+    WHITESPACE,
+    #[token("\n")]
+    NEWLINE,
+    PARSE_ERR,
+    LEX_ERR,
+    ROOT,
+}
+impl From<SyntaxKind> for rowan::SyntaxKind {
+    fn from(kind: SyntaxKind) -> Self {
+        Self(kind as u16)
+    }
+}
--- a/crates/lang/src/parser/ast/lossless/parser.rs
+++ b/crates/lang/src/parser/ast/lossless/parser.rs
@ -0,0 +1,437 @@
+use std::borrow::Borrow;
+
+use chumsky::container::Container;
+use rowan::{
+    Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken,
+};
+
+use crate::parser::{
+    ast::lossless::{lex::SyntaxKind::*, Lang},
+    Span,
+};
+
+use super::lex::{self, SyntaxKind};
+
+#[derive(PartialEq, Eq)]
+pub struct Parse {
+    pub green_node: GreenNode,
+}
+
+impl std::fmt::Debug for Parse {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        debug_print_green_node(NodeOrToken::Node(self.green_node.borrow()), f, 0)
+    }
+}
+
+fn debug_print_green_node(
+    node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
+    f: &mut std::fmt::Formatter<'_>,
+    lvl: i32,
+) -> std::fmt::Result {
+    for _ in 0..lvl {
+        f.write_str("    ")?;
+    }
+
+    match node {
+        NodeOrToken::Node(n) => {
+            writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()));
+            for c in n.children() {
+                debug_print_green_node(c, f, lvl + 1)?;
+            }
+            for _ in 0..lvl {
+                f.write_str("    ")?;
+            }
+            f.write_str("}\n")
+        }
+        NodeOrToken::Token(t) => {
+            writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text())
+        }
+    }
+}
+
+#[derive(Debug)]
+struct Parser<'src> {
+    tokens: Vec<(SyntaxKind, &'src str)>,
+    builder: GreenNodeBuilder<'src>,
+    errors: Vec<SyntaxError>,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+enum SyntaxError {
+    Expected(SyntaxKind),
+    AttrExpectedValue,
+    /// guessed if there's a newline and attr on next line without comma
+    /// should then suggest comma after attr
+    ExpectedCommaBetweenAttrs,
+}
+
+pub fn parse(src: &str) -> Parse {
+    let mut tokens = lex::lex(src);
+    Parser {
+        tokens,
+        builder: GreenNodeBuilder::new(),
+        errors: Vec::new(),
+    }
+    .parse()
+}
+
+impl Parser<'_> {
+    fn parse(mut self) -> Parse {
+        self.start_node(ROOT);
+
+        match self.expr(None) {
+            expr::ExprRes::Ok => (),
+            expr::ExprRes::Eof => (),
+            expr::ExprRes::NoExpr => todo!(),
+        }
+
+        self.builder.finish_node();
+        Parse {
+            green_node: self.builder.finish(),
+        }
+    }
+
+    fn start_node(&mut self, kind: SyntaxKind) {
+        self.builder.start_node(kind.into());
+    }
+    fn finish_node(&mut self) {
+        self.builder.finish_node();
+    }
+
+    /// Advance one token, adding it to the current branch of the tree builder.
+    fn bump(&mut self) {
+        let (kind, text) = self.tokens.pop().unwrap();
+        self.builder.token(kind.into(), text);
+    }
+    fn syntax_err(&mut self, err: SyntaxError) {
+        let (_, text) = self.tokens.pop().unwrap();
+        self.builder.token(PARSE_ERR.into(), text);
+        self.errors.push(err);
+    }
+    fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) {
+        self.builder.start_node_at(checkpoint, PARSE_ERR.into());
+        self.finish_node();
+        self.errors.push(err);
+    }
+    fn expected(&mut self, expected: SyntaxKind) {
+        self.syntax_err(SyntaxError::Expected(expected))
+    }
+    /// Peek at the first unprocessed token
+    fn current(&self) -> Option<SyntaxKind> {
+        self.tokens.last().map(|(kind, _)| *kind)
+    }
+    fn next(&self) -> Option<SyntaxKind> {
+        self.tokens
+            .get(self.tokens.len() - 2)
+            .map(|(kind, _)| *kind)
+    }
+    fn skip_ws(&mut self) {
+        while self.current() == Some(WHITESPACE) || self.current() == Some(NEWLINE) {
+            self.bump()
+        }
+    }
+    fn skip_ws_without_newlines(&mut self) {
+        while self.current() == Some(WHITESPACE) {
+            self.bump()
+        }
+    }
+}
+
+mod expr {
+    use rowan::Checkpoint;
+
+    use super::{attrset::AttrsetRes, instr::NodeRes, Parser};
+    use crate::parser::{ast::lossless::lex::SyntaxKind::*, Span};
+    impl Parser<'_> {
+        pub(super) fn expr(&mut self, start: Option<Checkpoint>) -> ExprRes {
+            self.skip_ws();
+            let start = start.unwrap_or_else(|| self.builder.checkpoint());
+            match self.current() {
+                Some(IDENT) => {
+                    let expr_res = match self.instr() {
+                        NodeRes::Ok => ExprRes::Ok,
+                        NodeRes::Eof => ExprRes::Eof,
+                    };
+                    self.builder.start_node_at(start, EXPR.into());
+                    self.finish_node();
+                    expr_res
+                }
+                Some(_) => self.atom(Some(start)),
+                None => ExprRes::Eof,
+            }
+        }
+
+        pub(super) fn atom(&mut self, start: Option<Checkpoint>) -> ExprRes {
+            self.skip_ws();
+            let start = start.unwrap_or_else(|| self.builder.checkpoint());
+            match self.current() {
+                Some(INT_NUM | FLOAT_NUM | STRING) => {
+                    self.bump();
+                    self.builder.start_node_at(start, EXPR.into());
+                    self.finish_node();
+                    ExprRes::Ok
+                }
+                Some(L_CURLY) => match self.attrset(start) {
+                    AttrsetRes::Ok => ExprRes::Ok,
+                    AttrsetRes::Eof => ExprRes::Eof,
+                },
+                Some(L_PAREN) => {
+                    self.builder.start_node_at(start, PARENTHESIZED_EXPR.into());
+                    self.bump();
+                    self.expr(None);
+                    self.skip_ws();
+                    match self.current() {
+                        Some(R_PAREN) => ExprRes::Ok,
+                        Some(_) => todo!(),
+                        None => ExprRes::Eof,
+                    }
+                }
+                Some(_) => ExprRes::NoExpr,
+                None => ExprRes::Eof,
+            }
+        }
+    }
+
+    pub enum ExprRes {
+        Ok,
+        Eof,
+        /// isnt an expression
+        NoExpr,
+    }
+}
+
+mod attrset {
+    use chumsky::container::Container;
+    use rowan::Checkpoint;
+
+    use super::{expr::ExprRes, instr::NodeRes, Parser};
+    use crate::parser::{
+        ast::lossless::{lex::SyntaxKind::*, parser::SyntaxError},
+        Span,
+    };
+    impl Parser<'_> {
+        pub(super) fn attrset(&mut self, checkpoint: Checkpoint) -> AttrsetRes {
+            assert_eq!(self.current(), Some(L_CURLY));
+            self.bump();
+            self.skip_ws();
+            match self.current() {
+                Some(R_CURLY) => {
+                    self.builder.start_node_at(checkpoint, ATTR_SET.into());
+                    self.bump();
+                    self.finish_node();
+                    AttrsetRes::Ok
+                }
+                Some(_) => {
+                    self.builder.start_node_at(checkpoint, ATTR_SET.into());
+                    let res = match self.attrs() {
+                        AttrRes::Eof => AttrsetRes::Eof,
+                        AttrRes::RCurly | AttrRes::Ok => {
+                            println!("curr: {:?}", self.current());
+                            AttrsetRes::Ok
+                        }
+                    };
+                    self.finish_node();
+                    res
+                }
+                None => AttrsetRes::Eof,
+            }
+            // self.start_node(ATTR);
+        }
+
+        fn attrs(&mut self) -> AttrRes {
+            let mut res = AttrRes::Ok;
+
+            while res == AttrRes::Ok {
+                println!("it: {:?}", self.tokens.last());
+                match self.attr() {
+                    AttrRes::Ok => {
+                        self.skip_ws_without_newlines();
+                        println!(
+                            "a: {:?}, {:?}",
+                            self.tokens.last(),
+                            self.tokens.get(self.tokens.len() - 2)
+                        );
+                        println!("errs: {:?}", self.errors);
+                        res = AttrRes::Ok;
+                        let checkpoint_previous_end = self.builder.checkpoint();
+                        res = match self.current() {
+                            Some(COMMA) => {
+                                self.bump();
+                                AttrRes::Ok
+                            }
+                            Some(R_CURLY) => {
+                                self.bump();
+                                res = AttrRes::Ok;
+                                break;
+                            }
+                            Some(NEWLINE) => {
+                                self.skip_ws();
+                                println!(
+                                    "b: {:?}, {:?}",
+                                    self.tokens.last(),
+                                    self.tokens.get(self.tokens.len() - 2)
+                                );
+                                match self.current() {
+                                    Some(COMMA) => {
+                                        self.bump();
+                                        AttrRes::Ok
+                                    }
+                                    Some(R_CURLY) => {
+                                        self.bump();
+                                        res = AttrRes::Ok;
+                                        break;
+                                    }
+                                    Some(IDENT) => {
+                                        println!("wtf");
+                                        self.syntax_err_by_checkpoint(
+                                            checkpoint_previous_end,
+                                            SyntaxError::ExpectedCommaBetweenAttrs,
+                                        );
+                                        // self.syntax_err(SyntaxError::ExpectedCommaBetweenAttrs);
+                                        AttrRes::Ok
+                                    }
+                                    Some(_) => {
+                                        self.bump();
+                                        AttrRes::Ok
+                                    }
+                                    None => {
+                                        res = AttrRes::Eof;
+                                        break;
+                                    }
+                                }
+                            }
+                            Some(_) => {
+                                self.bump();
+                                println!(
+                                    "c: {:?}, {:?}",
+                                    self.tokens.last(),
+                                    self.tokens.get(self.tokens.len() - 2)
+                                );
+                                AttrRes::Ok
+                            }
+                            None => {
+                                res = AttrRes::Eof;
+                                break;
+                            }
+                        }
+                    }
+                    AttrRes::Eof => {
+                        res = AttrRes::Eof;
+                        break;
+                    }
+                    AttrRes::RCurly => {
+                        res = AttrRes::RCurly;
+                        break;
+                    }
+                }
+            }
+            println!("toks_left: {:?}", self.tokens);
+            res
+        }
+
+        fn attr(&mut self) -> AttrRes {
+            self.skip_ws();
+            self.start_node(ATTR);
+            self.start_node(ATTR_NAME);
+            match self.current() {
+                Some(IDENT) => self.bump(),
+                Some(R_CURLY) => return AttrRes::Ok,
+                Some(_) => self.expected(IDENT),
+                None => return AttrRes::Eof,
+            }
+            self.finish_node();
+            self.skip_ws();
+            match self.current() {
+                Some(COLON) => self.bump(),
+                Some(R_CURLY) => {
+                    self.expected(COLON);
+                    return AttrRes::RCurly;
+                }
+                Some(_) => self.expected(COLON),
+                None => return AttrRes::Eof,
+            }
+            self.skip_ws();
+            self.start_node(ATTR_VALUE);
+            match self.expr(None) {
+                ExprRes::Ok => self.bump(),
+                ExprRes::Eof => return AttrRes::Eof,
+                ExprRes::NoExpr => match self.current() {
+                    Some(COMMA) => self.syntax_err(SyntaxError::AttrExpectedValue),
+                    Some(R_CURLY) => {
+                        self.syntax_err(SyntaxError::AttrExpectedValue);
+                        return AttrRes::RCurly;
+                    }
+                    Some(_) => self.expected(EXPR),
+                    None => unreachable!(),
+                },
+            }
+            self.finish_node();
+            self.finish_node();
+            AttrRes::Ok
+        }
+    }
+
+    #[derive(PartialEq, Eq)]
+    pub enum AttrsetRes {
+        Ok,
+        Eof,
+    }
+
+    #[derive(PartialEq, Eq)]
+    enum AttrRes {
+        Ok,
+        Eof,
+        RCurly,
+    }
+}
+
+mod instr {
+    use super::Parser;
+    use crate::parser::{
+        ast::lossless::{lex::SyntaxKind::*, parser::expr::ExprRes},
+        Span,
+    };
+
+    impl Parser<'_> {
+        pub(super) fn instr(&mut self) -> NodeRes {
+            assert_eq!(self.current(), Some(IDENT));
+            self.skip_ws();
+            self.start_node(INSTR);
+            self.instr_name();
+
+            // used to count positionals
+            let mut i = 0;
+            let params_checkpoint = self.builder.checkpoint();
+            loop {
+                match self.expr(None) {
+                    ExprRes::Ok => {
+                        i += 1;
+                        continue;
+                    }
+                    ExprRes::NoExpr | ExprRes::Eof => break,
+                }
+            }
+            if i >= 1 {
+                self.builder
+                    .start_node_at(params_checkpoint, INSTR_PARAMS.into());
+                self.finish_node();
+            }
+            self.finish_node();
+            NodeRes::Ok
+        }
+
+        fn instr_name(&mut self) {
+            self.start_node(INSTR_NAME);
+            while self.current() == Some(IDENT) {
+                self.bump();
+                self.skip_ws_without_newlines();
+            }
+            self.finish_node();
+        }
+    }
+
+    pub(super) enum NodeRes {
+        Ok,
+        Eof,
+    }
+}
--- a/crates/lang/src/parser/ast/raw_ast.rs
+++ b/crates/lang/src/parser/ast/raw_ast.rs
@ -0,0 +1,50 @@
+use indexmap::IndexMap;
+
+use super::super::Spanned;
+
+use super::super::Span;
+use super::Lit;
+
+#[derive(Debug, PartialEq)]
+pub struct RawExpression<'src> {
+    pub expr: Box<RawExpr<'src>>,
+    pub span: Span,
+}
+
+impl<'src> RawExpression<'src> {
+    pub fn new(expr: RawExpr<'src>, span: Span) -> Self {
+        Self {
+            expr: Box::new(expr),
+            span,
+        }
+    }
+}
+
+#[derive(Debug, PartialEq)]
+pub enum RawExpr<'src> {
+    Node(
+        Vec<Spanned<&'src str>>,
+        Option<Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>>,
+    ),
+    SimplePipe(RawExpression<'src>, RawExpression<'src>),
+    // NamingPipe(
+    //     Box<Expression<'src>>,
+    //     (Vec<Spanned<&'src str>>, Vec<Spanned<&'src str>>),
+    //     Box<Expression<'src>>,
+    // ),
+    MappingPipe(RawExpression<'src>, RawExpression<'src>),
+    NullPipe(RawExpression<'src>, RawExpression<'src>),
+    MultiPipe(IndexMap<Spanned<&'src str>, RawExpression<'src>>),
+    // LetIn(
+    //     IndexMap<Spanned<&'src str>, Box<Expression<'src>>>,
+    //     Box<Expression<'src>>,
+    // ),
+    // $
+    Var(&'src str),
+    // @
+    InputVar(&'src str),
+    AttrSet(Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>),
+    Lit(Lit<'src>),
+    Matrix(Spanned<(u16, u16)>, Vec<RawExpression<'src>>),
+    List(Vec<RawExpression<'src>>),
+}
--- a/crates/lang/src/parser/tests.rs
+++ b/crates/lang/src/parser/tests.rs
@ -1,4 +1,4 @@
-use crate::parser::ast::{Expr, File};
+use crate::parser::ast::File;
 use crate::parser::parse;
 use crate::tokens::Token;
 use chumsky::input::Stream;
--- a/crates/lang/src/tokens.rs
+++ b/crates/lang/src/tokens.rs
@ -14,6 +14,14 @@ pub enum Token<'a> {
    Let,
    #[token("in")]
    In,
+    #[token("mat")]
+    Mat,
+    #[regex("[\\d]+x[\\d]+", |lex| {
+        let (x, y) = lex.slice().split_once('x').expect("shouldn't fail to split"); 
+        // TODO: handle overflows etc
+        (x.parse().expect("should only match valid u16s"), y.parse().expect("should only match valid u16s"))
+    })]
+    Dimensions((u16, u16)),
    #[regex("[\\d]+", |lex| lex.slice())]
    Int(&'a str),
    #[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)", |lex| lex.slice())]
@ -30,12 +38,11 @@ pub enum Token<'a> {
    Mult,
    #[token("/")]
    Div,
-    #[regex("[a-zA-Z_]+[a-zA-Z0-9_\\-]*", |lex| lex.slice())]
+    // TODO: figure out how to allow numbers in words?
+    #[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*", |lex| lex.slice().trim())]
    Word(&'a str),
    #[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
    VarIdent(&'a str),
-    #[token("@..")]
-    InputSpread,
    #[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
    InputIdent(&'a str),
    #[token(",")]
@ -55,7 +62,7 @@ pub enum Token<'a> {
    #[token(":")]
    Colon,
    #[token(";")]
-    SemiColon,
+    Semicolon,
    #[token("[")]
    BracketOpen,
    #[token("]")]
--- a/crates/lang/src/tokens/tests.rs
+++ b/crates/lang/src/tokens/tests.rs
@ -34,7 +34,7 @@ lexer_test! {

 lexer_test! {
    test_lex_subgroup,
-    "subgroup(first, second) = a | b { in1: $first } | c { in1: $second }",
+    "subgroup(first, second) = a | b [ $first ] | c [ $second ]",
    [
        Token::Word("subgroup"),
        Token::ParenOpen,
@ -46,18 +46,14 @@ lexer_test! {
        Token::Word("a"),
        Token::Pipe,
        Token::Word("b"),
-        Token::BraceOpen,
-        Token::Word("in1"),
-        Token::Colon,
+        Token::BracketOpen,
        Token::VarIdent("first"),
-        Token::BraceClose,
+        Token::BracketClose,
        Token::Pipe,
        Token::Word("c"),
-        Token::BraceOpen,
-        Token::Word("in1"),
-        Token::Colon,
+        Token::BracketOpen,
        Token::VarIdent("second"),
-        Token::BraceClose
+        Token::BracketClose
    ]
 }

--- a/flake.lock
+++ b/flake.lock
@ -33,11 +33,11 @@
        "pre-commit-hooks": "pre-commit-hooks_2"
      },
      "locked": {
-        "lastModified": 1712579011,
-        "narHash": "sha256-trHgFNW8CW85c1OuAPBI+OGous53KkVhMemvcq7syDo=",
+        "lastModified": 1712724616,
+        "narHash": "sha256-qs9uEbrOpp6oXcDOp5cpilyU52t78ZpEPATtaHRVLIU=",
        "owner": "cachix",
        "repo": "devenv",
-        "rev": "a71323c618664a6b7a39bc183b0ce22ac8511cf9",
+        "rev": "d1a11d14dbe96a03c7f9068e4d3af05f283734e0",
        "type": "github"
      },
      "original": {
@ -83,11 +83,11 @@
        "rust-analyzer-src": "rust-analyzer-src"
      },
      "locked": {
-        "lastModified": 1712384501,
-        "narHash": "sha256-AZmYmEnc1ZkSlxUJVUtGh9VFAqWPr+xtNIiBqD2eKfc=",
+        "lastModified": 1712730246,
+        "narHash": "sha256-iB8bFj+07RHpmt+XuGGvYQk2Iwm12u6+DklGq/+Tg5s=",
        "owner": "nix-community",
        "repo": "fenix",
-        "rev": "99c6241db5ca5363c05c8f4acbdf3a4e8fc42844",
+        "rev": "d402ae4a5e5676722290470f61a5e8e3155b5487",
        "type": "github"
      },
      "original": {
@ -447,11 +447,11 @@
    },
    "nixpkgs_3": {
      "locked": {
-        "lastModified": 1712163089,
-        "narHash": "sha256-Um+8kTIrC19vD4/lUCN9/cU9kcOsD1O1m+axJqQPyMM=",
+        "lastModified": 1712608508,
+        "narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=",
        "owner": "nixos",
        "repo": "nixpkgs",
-        "rev": "fd281bd6b7d3e32ddfa399853946f782553163b5",
+        "rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6",
        "type": "github"
      },
      "original": {
@ -463,11 +463,11 @@
    },
    "nixpkgs_4": {
      "locked": {
-        "lastModified": 1712439257,
-        "narHash": "sha256-aSpiNepFOMk9932HOax0XwNxbA38GOUVOiXfUVPOrck=",
+        "lastModified": 1712608508,
+        "narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "ff0dbd94265ac470dda06a657d5fe49de93b4599",
+        "rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6",
        "type": "github"
      },
      "original": {
@ -567,11 +567,11 @@
    "rust-analyzer-src": {
      "flake": false,
      "locked": {
-        "lastModified": 1712156296,
-        "narHash": "sha256-St7ZQrkrr5lmQX9wC1ZJAFxL8W7alswnyZk9d1se3Us=",
+        "lastModified": 1712663608,
+        "narHash": "sha256-tN9ZL6kGppmHg84lxlpAlaN+kXWNctKK7Yitq/iXDEw=",
        "owner": "rust-lang",
        "repo": "rust-analyzer",
-        "rev": "8e581ac348e223488622f4d3003cb2bd412bf27e",
+        "rev": "a5feb4f05f09adca661c869b1bf2324898cbaa43",
        "type": "github"
      },
      "original": {
--- a/flake.nix
+++ b/flake.nix
@ -11,24 +11,38 @@
    extra-substituters = "https://devenv.cachix.org";
  };

-  outputs = { self, nixpkgs, devenv, systems, ... } @ inputs:
-    let
+  outputs = {
+    self,
+    nixpkgs,
+    devenv,
+    systems,
+    ...
+  } @ inputs: let
    forEachSystem = nixpkgs.lib.genAttrs (import systems);
-    in
-    {
-      devShells = forEachSystem
-        (system:
-          let
+  in {
+    devShells =
+      forEachSystem
+      (system: let
        pkgs = nixpkgs.legacyPackages.${system};
-          in
-          {
+      in {
        default = devenv.lib.mkShell {
          inherit inputs pkgs;
          modules = [
-                ({pkgs, config, ...}: {
+            ({
+              pkgs,
+              config,
+              ...
+            }: {
              languages.rust = {
                enable = true;
                channel = "nightly";
+                components = [
+                  "rustc"
+                  "cargo"
+                  "clippy"
+                  "rustfmt"
+                  "rust-src"
+                ];
              };

              pre-commit.hooks = {
@ -37,11 +51,15 @@
              };

              packages = with pkgs; [
-                    just nushell
+                just
+                nushell
                ripgrep
-                    typst typst-lsp
+                typst
+                typst-lsp
                mold
-                    cargo-nextest cargo-watch
+                cargo-nextest
+                cargo-watch
+                rust-analyzer
              ];
            })
          ];
--- a/testfiles/test.owo
+++ b/testfiles/test.owo
@ -1,7 +1,4 @@
-def blend1 = [ 
-		open "test.png", 
-		open "test2.png" 
-	] 
-	| blend multiply 0.6
-
-def blend2 = open "test.png" | blend multiply 0.6 [ open test2.png ]
+meow mew meow 5 3.14 "uwu" { 
+	meow: test 24
+	another: hi "hello",
+} "awa"