feat(ir): actually get toposort working

2025-11-17 17:19:41 +01:00 · 2024-01-19 02:59:15 +01:00 · 2024-01-19 02:59:15 +01:00 · fa2893bc77
commit fa2893bc77
parent 4fd35736d5
1 changed files with 75 additions and 78 deletions
--- a/crates/ir/src/lib.rs
+++ b/crates/ir/src/lib.rs
@ -1,6 +1,5 @@
-use std::{collections::BTreeSet, iter, ops::RangeInclusive};
+use std::{num::NonZeroUsize, ops::RangeInclusive};
 use either::Either;
 use instruction::SocketCount;
 use serde::{Deserialize, Serialize};
@ -9,7 +8,7 @@ pub mod instruction;
 pub mod semi_human;
 pub type Map<K, V> = std::collections::BTreeMap<K, V>;
-pub type Set<V> = std::collections::BTreeSet<V>;
+pub type Set<T> = std::collections::BTreeSet<T>;
 /// Gives you a super well typed graph IR for a given human-readable repr.
 ///
@ -41,8 +40,8 @@ pub fn from_ron(source: &str) -> ron::error::SpannedResult<GraphIr> {
 ///   to come back to an already visited node.
 ///
 /// Here, if an edge points from _A_ to _B_ (`A --> B`),
-/// then _A_ is called a **dependency** of _B_,
+/// then _A_ is called a **dependency** or an **input source** of _B_,
-/// and _B_ is called a **dependent** of _A_.
+/// and _B_ is called a **dependent** or an **output target** of _A_.
 ///
 /// The DAG also enables another neat operation:
 /// [Topological sorting](https://en.wikipedia.org/wiki/Topological_sorting).
@ -69,33 +68,8 @@ pub struct GraphIr {
    rev_edges: Map<id::Input, id::Output>,
 }
 // TODO: this impl block, but actually the whole module, screams for tests
 impl GraphIr {
    /// Look "forwards" in the graph to see what other instructions this instruction feeds into.
    ///
    /// The output slots represent the top-level iterator,
    /// and each one's connections are emitted one level below.
    ///
    /// Just [`Iterator::flatten`] if you are not interested in the slots.
    ///
    /// The same caveats as for [`GraphIr::resolve`] apply.
    #[must_use]
    pub fn dependents(
        &self,
        subject: &id::Instruction,
    ) -> Option<impl Iterator<Item = impl Iterator<Item = &id::Instruction>> + '_> {
        let (subject, kind) = self.instructions.get_key_value(subject)?;
        let SocketCount { inputs, .. } = kind.socket_count();
        Some((0..inputs).map(|idx| {
            let output = id::Output(socket(subject, idx));
            self.edges
                .get(&output)
                .map_or(Either::Right(iter::empty()), |targets| {
                    Either::Left(targets.iter().map(|input| &input.socket().belongs_to))
                })
        }))
    }
    /// Look "backwards" in the graph,
    /// and find out what instructions need to be done before this one.
    /// The input slots are visited in order.
@ -105,22 +79,41 @@ impl GraphIr {
    ///
    /// The same caveats as for [`GraphIr::resolve`] apply.
    #[must_use]
-    pub fn dependencies(
+    pub fn input_sources(
        &self,
        subject: &id::Instruction,
-    ) -> Option<impl Iterator<Item = Option<&id::Instruction>> + '_> {
+    ) -> Option<impl Iterator<Item = Option<&id::Output>> + '_> {
        let (subject, kind) = self.instructions.get_key_value(subject)?;
        let SocketCount { inputs, .. } = kind.socket_count();
        Some((0..inputs).map(|idx| {
            let input = id::Input(socket(subject, idx));
-            self.rev_edges
+            self.rev_edges.get(&input)
-                .get(&input)
+        }))
-                .map(|output| &output.socket().belongs_to)
+    }
    /// Look "forwards" in the graph to see what other instructions this instruction feeds into.
    ///
    /// The output slots represent the top-level iterator,
    /// and each one's connections are emitted one level below.
    ///
    /// Just [`Iterator::flatten`] if you are not interested in the slots.
    ///
    /// The same caveats as for [`GraphIr::resolve`] apply.
    #[must_use]
    pub fn output_targets(
        &self,
        subject: &id::Instruction,
    ) -> Option<impl Iterator<Item = Option<&Set<id::Input>>> + '_> {
        let (subject, kind) = self.instructions.get_key_value(subject)?;
        let SocketCount { outputs, .. } = kind.socket_count();
        Some((0..outputs).map(|idx| {
            let output = id::Output(socket(subject, idx));
            self.edges.get(&output)
        }))
    }
    // TODO: this function, but actually the whole module, screams for tests
    /// Returns the instruction corresponding to the given ID.
    /// Returns [`None`] if there is no such instruction in this graph IR.
    ///
@ -133,33 +126,14 @@ impl GraphIr {
    pub fn resolve<'ir>(&'ir self, subject: &id::Instruction) -> Option<Instruction<'ir>> {
        let (id, kind) = self.instructions.get_key_value(subject)?;
-        // just try each slot and see if it's connected
+        let input_sources = self.input_sources(subject)?.collect();
-        // very crude, but it works for a proof of concept
+        let output_targets = self.output_targets(subject)?.collect();
        let SocketCount { inputs, outputs } = kind.socket_count();
        let socket = |id: &id::Instruction, idx| id::Socket {
            belongs_to: id.clone(),
            // impossible since the length is limited to a u16 already
            #[allow(clippy::cast_possible_truncation)]
            idx: id::SocketIdx(idx as u16),
        };
        let mut inputs_from = vec![None; inputs.into()];
        for (idx, slot) in inputs_from.iter_mut().enumerate() {
            let input = id::Input(socket(id, idx));
            *slot = self.rev_edges.get(&input);
        }
        let mut outputs_to = vec![None; outputs.into()];
        for (idx, slot) in outputs_to.iter_mut().enumerate() {
            let output = id::Output(socket(id, idx));
            *slot = self.edges.get(&output);
        }
        Some(Instruction {
            id,
            kind,
-            inputs_from,
+            input_sources,
-            outputs_to,
+            output_targets,
        })
    }
@ -187,15 +161,18 @@ impl GraphIr {
    ///
    /// Panics if there are any cycles in the IR, as it needs to be a DAG.
    #[must_use]
-    // yes, this function could actually return an iterator and be lazy
+    // yes, this function could probably return an iterator and be lazy
    // no, not today
    pub fn topological_sort(&self) -> Vec<Instruction> {
        // count how many incoming edges each vertex has
-        let nonzero_input_counts: Map<_, usize> =
+        let mut nonzero_input_counts: Map<_, NonZeroUsize> =
            self.rev_edges
                .iter()
                .fold(Map::new(), |mut count, (input, _)| {
-                    *count.entry(input.socket().belongs_to.clone()).or_default() += 1;
+                    let _ = *count
                        .entry(input.socket().belongs_to.clone())
                        .and_modify(|count| *count = count.saturating_add(1))
                        .or_insert(NonZeroUsize::MIN);
                    count
                });
@ -204,32 +181,52 @@ impl GraphIr {
        let no_inputs: Vec<_> = {
            let nonzero: Set<_> = nonzero_input_counts.keys().collect();
            let all: Set<_> = self.instructions.keys().collect();
-            all.difference(&nonzero).copied().collect()
+            all.difference(&nonzero).copied().cloned().collect()
        };
        let mut active_queue = no_inputs;
        // then let's find the order!
        let mut order = Vec::new();
        let mut active_queue = no_inputs;
        while let Some(current) = active_queue.pop() {
            // now that this vertex is visited and resolved,
            // make sure all dependents notice that
-            for dependent in self
+            let dependents = self
-                .dependents(current)
+                .output_targets(&current)
                .expect("graph to be consistent")
                .flatten()
-            {
+                .flatten();
-                dbg!(dependent);
+
            for dependent_input in dependents {
                let dependent = &dependent_input.socket().belongs_to;
                // how many inputs are connected to this dependent without us?
                let count = nonzero_input_counts
                    .get_mut(dependent)
                    .expect("connected output must refer to non-zero input");
                let new = NonZeroUsize::new(count.get() - 1);
                if let Some(new) = new {
                    // aww, still some
                    *count = new;
                    continue;
                }
                // none, that means this one is free now! let's throw it onto the active queue then
                let (now_active, _) = nonzero_input_counts
                    .remove_entry(dependent)
                    .expect("connected output must refer to non-zero input");
                active_queue.push(now_active);
            }
            // TODO: check if this instruction is "well-fed", that is, has all the inputs it needs,
            //       and if not, panic
-            order.push(self.resolve(current).expect("graph to be consistent"));
+            order.push(self.resolve(&current).expect("graph to be consistent"));
        }
        assert!(
-            !nonzero_input_counts.is_empty(),
+            nonzero_input_counts.is_empty(),
            concat!(
                "topological sort didn't cover all instructions\n",
                "either there are unconnected inputs, or there is a cycle\n",
@ -250,8 +247,8 @@ pub struct Instruction<'ir> {
    pub kind: &'ir instruction::Kind,
    // can't have these two public since then a user might corrupt their length
-    inputs_from: Vec<Option<&'ir id::Output>>,
+    input_sources: Vec<Option<&'ir id::Output>>,
-    outputs_to: Vec<Option<&'ir BTreeSet<id::Input>>>,
+    output_targets: Vec<Option<&'ir Set<id::Input>>>,
 }
 impl<'ir> Instruction<'ir> {
@ -260,14 +257,14 @@ impl<'ir> Instruction<'ir> {
    /// [`None`] means that this input is unfilled,
    /// and must be filled before the instruction can be ran.
    #[must_use]
-    pub fn inputs_from(&self) -> &[Option<&'ir id::Output>] {
+    pub fn input_sources(&self) -> &[Option<&'ir id::Output>] {
-        &self.inputs_from
+        &self.input_sources
    }
-    /// To whom outputs are sent. [`None`] means that this output is unused.
+    /// To whom outputs are sent.
    #[must_use]
-    pub fn outputs_to(&self) -> &[Option<&'ir BTreeSet<id::Input>>] {
+    pub fn output_targets(&self) -> &[Option<&'ir Set<id::Input>>] {
-        &self.outputs_to
+        &self.output_targets
    }
 }