lang: basic ast work

This commit is contained in:
Schrottkatze 2024-06-05 18:00:14 +02:00
parent cfefab9fd0
commit d6bc644fb6
No known key found for this signature in database
13 changed files with 560 additions and 58 deletions

68
crates/lang/src/ast.rs Normal file
View file

@ -0,0 +1,68 @@
use std::{collections::HashMap, path::PathBuf, sync::Arc};
use rowan::ast::{AstNode, AstPtr};
use self::{
error::{Error, WorldCreationError},
mod_tree::{Module, ModuleTree},
source_file::SourceFile,
};
mod error;
mod mod_tree;
mod nodes;
mod source_file;
struct Loc<T: AstNode> {
file: PathBuf,
syntax_el: AstPtr<T>,
}
impl<T: AstNode> Loc<T> {
pub fn new(file: PathBuf, syntax_el: &T) -> Self {
Self {
file,
syntax_el: AstPtr::new(syntax_el),
}
}
}
pub struct World {
entry_point: PathBuf,
files: HashMap<PathBuf, SourceFile>,
errors: Vec<Error>,
module_tree: ModuleTree,
}
impl World {
pub fn new(entry_point: PathBuf) -> Result<Self, WorldCreationError> {
let entry_point = entry_point.canonicalize().unwrap();
let source = match std::fs::read_to_string(&entry_point) {
Ok(f) => f,
Err(e) => return Err(WorldCreationError::FailedToOpenEntryPoint(entry_point, e)),
};
let (src, mut errors) = SourceFile::parse_from(entry_point.clone(), source);
let (module_tree, mut files, new_errors) = ModuleTree::parse_from_main(&entry_point, &src);
errors.extend(new_errors);
module_tree.print_tree(&src.tree());
dbg!(&errors);
let _ = files.insert(entry_point.clone(), src);
Ok(Self {
files,
entry_point,
errors,
module_tree,
})
}
// fn parse_mod_tree(files: &mut HashMap<PathBuf, SourceFile>)
}
// struct SourceFile {
// tree: SyntaxNode,
// items: Vec<TopLevelItem>,
// }

View file

@ -0,0 +1,30 @@
use std::{fmt::Display, path::PathBuf};
use crate::lst_parser::error::SyntaxError;
#[derive(Debug)]
pub enum Error {
Syntax(PathBuf, SyntaxError),
FailedToOpenFileMod(PathBuf, std::io::Error),
}
impl Error {
pub fn from_syntax(file: PathBuf, e: SyntaxError) -> Self {
Self::Syntax(file, e)
}
}
#[derive(Debug)]
pub enum WorldCreationError {
FailedToOpenEntryPoint(PathBuf, std::io::Error),
}
impl Display for WorldCreationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
WorldCreationError::FailedToOpenEntryPoint(entry_path, e) => {
write!(f, "failed to open entry_point '{entry_path:?}': {e}")
}
}
}
}

View file

@ -0,0 +1,248 @@
use std::{
collections::{HashMap, HashSet},
path::PathBuf,
sync::Arc,
};
use rowan::ast::{AstNode, AstPtr};
use crate::{lst_parser::syntax_kind::SyntaxKind, SyntaxNode};
use super::{
error::Error,
nodes::{self, Mod, ModBody, ModName, Root},
source_file::SourceFile,
Loc,
};
pub struct ModuleTree {
modules: HashMap<String, Arc<Module>>,
}
impl ModuleTree {
pub fn parse_from_main(
path: &PathBuf,
main_file: &SourceFile,
) -> (Self, HashMap<PathBuf, SourceFile>, Vec<Error>) {
let tree = main_file.tree();
let mut files = HashMap::new();
let mut errors = Vec::new();
let entry_path = path.parent().unwrap().to_owned();
let modules = main_file
.top_level_modules()
.into_iter()
.filter_map(|m| {
let module =
Module::parse_mod(m.to_node(tree.syntax()), Vec::new(), &path, &entry_path);
match module {
Ok(module) => {
files.extend(module.1);
errors.extend(module.2);
Some((module.0.name(), Arc::new(module.0)))
}
Err(err) => {
errors.push(err);
None
}
}
})
.collect::<HashMap<String, Arc<Module>>>();
(Self { modules }, files, errors)
}
pub fn print_tree(&self, lst: &Root) {
let name = "main";
print_tree(&name, &self.modules, 0)
}
}
pub struct Module {
path: Vec<String>,
name: String,
kind: ModuleKind,
children: HashMap<String, Arc<Module>>,
parent: Option<Arc<Module>>,
}
impl Module {
fn parse_mod(
module: Mod,
cur_path: Vec<String>,
cur_file: &PathBuf,
entry_path: &PathBuf,
) -> Result<(Self, HashMap<PathBuf, SourceFile>, Vec<Error>), Error> {
dbg!(cur_file);
dbg!(entry_path);
let children = module
.syntax()
.children()
// .map(|n| n.kind())
.collect::<Vec<_>>();
if children.len() == 1 {
let name = &children[0];
assert_eq!(name.kind(), SyntaxKind::MODULE_NAME);
return Self::parse_file_mod(
name.text().to_string(),
Loc::new(cur_file.clone(), &module),
cur_path,
entry_path,
);
} else if children.len() == 2 {
let name = &children[0];
assert_eq!(name.kind(), SyntaxKind::MODULE_NAME);
let body = &children[1];
assert_eq!(body.kind(), SyntaxKind::MODULE_BODY);
return Ok(Self::parse_inline_mod(
module, cur_path, cur_file, entry_path,
));
}
todo!()
}
fn parse_file_mod(
name: String,
decl: Loc<Mod>,
mut cur_path: Vec<String>,
entry_path: &PathBuf,
) -> Result<(Self, HashMap<PathBuf, SourceFile>, Vec<Error>), Error> {
let mut mod_file_path = entry_path.to_owned();
for el in &cur_path {
mod_file_path.push(format!("{el}/"));
}
mod_file_path.push(format!("{name}.owo"));
let mut files = HashMap::new();
let mut errors = Vec::new();
let source = match std::fs::read_to_string(dbg!(&mod_file_path)) {
Ok(f) => f,
Err(e) => return Err(Error::FailedToOpenFileMod(mod_file_path, e)),
};
let (source_file, file_errors) = SourceFile::parse_from(mod_file_path.clone(), source);
errors.extend(file_errors);
let tree = source_file.tree();
let old_path = cur_path.clone();
cur_path.push(name.clone());
let children = source_file
.top_level_modules()
.into_iter()
.filter_map(|m| {
let module = Module::parse_mod(
m.to_node(tree.syntax()),
cur_path.clone(),
&mod_file_path,
&entry_path,
);
match module {
Ok(module) => {
files.extend(module.1);
errors.extend(module.2);
Some((module.0.name(), Arc::new(module.0)))
}
Err(err) => {
errors.push(err);
None
}
}
})
.collect::<HashMap<String, Arc<Module>>>();
files.insert(mod_file_path.clone(), source_file);
Ok((
Self {
path: old_path,
name,
kind: ModuleKind::File {
declaration: decl,
file_id: mod_file_path,
},
children,
parent: None,
},
files,
errors,
))
}
fn parse_inline_mod(
module: Mod,
mut cur_path: Vec<String>,
cur_file: &PathBuf,
entry_path: &PathBuf,
) -> (Self, HashMap<PathBuf, SourceFile>, Vec<Error>) {
let mut children = module.syntax().children().collect::<Vec<_>>();
let body = ModBody::cast(children.pop().unwrap()).unwrap();
let name = ModName::cast(children.pop().unwrap()).unwrap();
let mut files = HashMap::new();
let mut errors = Vec::new();
let old_path = cur_path.clone();
cur_path.push(name.syntax().to_string());
let children = body
.syntax()
.children()
.filter_map(|node| Mod::cast(node))
.filter_map(|m| {
let m = Self::parse_mod(m, cur_path.clone(), cur_file, entry_path);
match m {
Ok(module) => {
files.extend(module.1);
errors.extend(module.2);
Some((module.0.name(), Arc::new(module.0)))
}
Err(err) => {
errors.push(err);
None
}
}
})
.collect::<HashMap<String, Arc<Module>>>();
(
Self {
name: name.syntax().text().to_string(),
kind: ModuleKind::Inline(Loc::new(cur_file.to_owned(), &module)),
children,
parent: None,
path: old_path,
},
files,
errors,
)
}
pub fn name(&self) -> String {
// self.name.to_node(lst.syntax()).syntax().text().to_string()
self.name.clone()
}
}
fn print_tree(name: &str, children: &HashMap<String, Arc<Module>>, level: u32) {
const INDENT_STR: &str = " ";
for _ in 0..level {
print!("{}", INDENT_STR);
}
print!("{name}\n");
for (name, module) in children {
print_tree(name, &module.children, level + 1);
}
}
enum ModuleKind {
Inline(Loc<nodes::Mod>),
File {
declaration: Loc<nodes::Mod>,
file_id: PathBuf,
},
}

View file

@ -0,0 +1,77 @@
use crate::lst_parser::syntax_kind::SyntaxKind::*;
use crate::SyntaxNode;
use rowan::Language;
macro_rules! ast_nodes {
($($ast:ident, $kind:ident);+) => {
$(
#[derive(PartialEq, Eq, Hash)]
#[repr(transparent)]
pub struct $ast(SyntaxNode);
impl rowan::ast::AstNode for $ast {
type Language = crate::Lang;
fn can_cast(kind: <Self::Language as Language>::Kind) -> bool {
kind == $kind
}
fn cast(node: SyntaxNode) -> Option<Self> {
if node.kind() == $kind {
Some(Self(node))
} else {
None
}
}
fn syntax(&self) -> &SyntaxNode {
&self.0
}
}
)+
};
}
ast_nodes!(
Def, DEF;
DefName, DEF_NAME;
DefBody, DEF_BODY;
Mod, MODULE;
ModName, MODULE_NAME;
ModBody, MODULE_BODY;
Use, USE;
UsePat, USE_PAT;
PatItem, PAT_ITEM;
PatGlob, PAT_GLOB;
PatGroup, PAT_GROUP;
Literal, LITERAL;
IntLit, INT_NUM;
FloatLit, FLOAT_NUM;
StringLit, STRING;
Matrix, MATRIX;
MatrixRow, MAT_ROW;
Vector, VEC;
List, LIST;
CollectionItem, COLLECTION_ITEM;
ParenthesizedExpr, PARENTHESIZED_EXPR;
Expression, EXPR;
Pipeline, PIPELINE;
Instruction, INSTR;
InstructionName, INSTR_NAME;
InstructionParams, INSTR_PARAMS;
AttributeSet, ATTR_SET;
Attribute, ATTR;
AttributeName, ATTR_NAME;
AttributeValue, ATTR_VALUE;
ParseError, PARSE_ERR;
LexError, LEX_ERR;
Root, ROOT;
Eof, EOF
);

View file

@ -0,0 +1,80 @@
use std::path::PathBuf;
use rowan::ast::{AstNode, AstPtr};
use crate::lst_parser::{
grammar::source_file,
input::Input,
output::Output,
syntax_kind::{self, SyntaxKind},
Parser,
};
use super::{
error::Error,
nodes::{Def, Mod, Root, Use},
};
pub struct SourceFile {
lst: Root,
modules: Vec<AstPtr<Mod>>,
defs: Vec<AstPtr<Def>>,
uses: Vec<AstPtr<Use>>,
}
impl SourceFile {
pub fn parse_from(path: PathBuf, source_text: String) -> (Self, Vec<Error>) {
let toks = syntax_kind::lex(&source_text);
let input = Input::new(&toks);
let mut parser = Parser::new(input);
source_file(&mut parser);
let parser_out = parser.finish();
let out = Output::from_parser_output(toks, parser_out);
let lst = out.syntax();
let mut modules = Vec::new();
let mut defs = Vec::new();
let mut uses = Vec::new();
for c in lst.children() {
match c.kind() {
SyntaxKind::MODULE => modules.push(AstPtr::new(&Mod::cast(c).unwrap())),
SyntaxKind::DEF => defs.push(AstPtr::new(&Def::cast(c).unwrap())),
SyntaxKind::USE => uses.push(AstPtr::new(&Use::cast(c).unwrap())),
_ => {}
}
}
(
Self {
lst: Root::cast(lst).unwrap(),
modules,
defs,
uses,
},
out.errors()
.into_iter()
.map(|err| Error::from_syntax(path.clone(), err))
.collect(),
)
}
pub fn tree(&self) -> &Root {
&self.lst
}
pub fn top_level_modules(&self) -> Vec<AstPtr<Mod>> {
self.modules.clone()
}
pub fn top_level_defs(&self) -> Vec<AstPtr<Def>> {
self.defs.clone()
}
pub fn top_level_uses(&self) -> Vec<AstPtr<Use>> {
self.uses.clone()
}
}

View file

@ -1,2 +1,24 @@
#![feature(type_alias_impl_trait, lint_reasons)]
use crate::lst_parser::syntax_kind::SyntaxKind;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Lang {}
impl rowan::Language for Lang {
type Kind = SyntaxKind;
#[allow(unsafe_code)]
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::ROOT as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
kind.into()
}
}
pub type SyntaxNode = rowan::SyntaxNode<Lang>;
pub type SyntaxToken = rowan::SyntaxNode<Lang>;
pub type SyntaxElement = rowan::NodeOrToken<SyntaxNode, SyntaxToken>;
pub mod ast;
pub mod lst_parser;

View file

@ -3,9 +3,9 @@ use owo_colors::{unset_override, OwoColorize};
use rowan::{GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken};
use std::mem;
use crate::lst_parser::{
input::MEANINGLESS_TOKS,
syntax_kind::{Lang, SyntaxKind},
use crate::{
lst_parser::{input::MEANINGLESS_TOKS, syntax_kind::SyntaxKind},
Lang, SyntaxNode,
};
use super::{
@ -201,4 +201,12 @@ impl Output {
errors,
}
}
pub fn syntax(&self) -> SyntaxNode {
SyntaxNode::new_root(self.green_node.clone())
}
pub fn errors(&self) -> Vec<SyntaxError> {
self.errors.clone()
}
}

View file

@ -53,7 +53,6 @@ pub enum SyntaxKind {
LIST,
// either of a vec, a matrix or a list
COLLECTION_ITEM,
DECL,
PARENTHESIZED_EXPR,
EXPR,
LITERAL,
@ -130,7 +129,6 @@ pub enum SyntaxKind {
ROOT,
EOF,
TOMBSTONE,
ERROR,
}
pub type TokenSet = EnumSet<SyntaxKind>;
@ -140,17 +138,3 @@ impl From<SyntaxKind> for rowan::SyntaxKind {
Self(kind as u16)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Lang {}
impl rowan::Language for Lang {
type Kind = SyntaxKind;
#[allow(unsafe_code)]
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::ROOT as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
kind.into()
}
}

View file

@ -1,7 +1,10 @@
use clap::Parser;
use std::{fs, path::PathBuf};
use lang::lst_parser::{self, grammar, input, output::Output, syntax_kind};
use lang::{
ast::World,
lst_parser::{self, grammar, input, output::Output, syntax_kind},
};
#[derive(Parser)]
struct Args {
@ -12,37 +15,18 @@ struct Args {
fn main() {
let args = Args::parse();
let n = args.file.clone();
let f = fs::read_to_string(n.clone()).expect("failed to read file");
// let f = fs::read_to_string(n.clone()).expect("failed to read file");
let toks = dbg!(syntax_kind::lex(&f));
let input = input::Input::new(&toks);
let mut parser = lst_parser::Parser::new(input);
// let toks = dbg!(syntax_kind::lex(&f));
// let input = input::Input::new(&toks);
// let mut parser = lst_parser::Parser::new(input);
grammar::source_file(&mut parser);
// grammar::source_file(&mut parser);
let p_out = dbg!(parser.finish());
let o = Output::from_parser_output(toks, p_out);
// let p_out = dbg!(parser.finish());
// let o = Output::from_parser_output(toks, p_out);
println!("{}", o.debug_colored());
// println!("{}", o.debug_colored());
// let parse_res = parser::parse(&f);
// println!("parse: {:?}", parse_res);
// dbg!(lex::lex(&f));
// let mut err_collector = ErrorCollector::new(vec![(n.to_str().unwrap(), &f)]);
// println!("file: {f}\n");
// let parse_res = parse(&f);
// err_collector.insert_many(
// args.file.to_str().unwrap(),
// lang::err_reporting::Stage::Parse,
// parse_res
// .errors()
// .into_iter()
// .map(|e| e.to_owned())
// .collect::<Vec<_>>(),
// );
// err_collector.report_raw();
// println!("res: {:?}", parse_res);
World::new(n);
}

1
testfiles/hello.owo Normal file
View file

@ -0,0 +1 @@
mod meow;

2
testfiles/hello/meow.owo Normal file
View file

@ -0,0 +1,2 @@
mod mrawr {}
mod mrow {}

View file

@ -1,11 +1,8 @@
use hello::meow;
def hello_world = meow [ 1 2 ];
def test
mod hello {
use gay:;uwu_meow::*;
def meow = uwu;
mod hello;
mod world {
mod meow {}
mod uwu {
mod test {}
mod meow {}
}
}

1
testfiles/uwu.owo Normal file
View file

@ -0,0 +1 @@
mod meow {}