diff --git a/Cargo.toml b/Cargo.toml
index 3d16686..1af4769 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
pyo3 = "0.25"
lasso = "0.7.3"
+itertools = "0.14.0"
[package.metadata.maturin]
version-from-git = true
diff --git a/src/rust/fdb.rs b/src/rust/connectors/fdb.rs
similarity index 100%
rename from src/rust/fdb.rs
rename to src/rust/connectors/fdb.rs
diff --git a/src/rust/formatters/mod.rs b/src/rust/formatters/mod.rs
new file mode 100644
index 0000000..8dbbdd2
--- /dev/null
+++ b/src/rust/formatters/mod.rs
@@ -0,0 +1,147 @@
+use crate::{Node, NodeId, Qube};
+use itertools::Itertools;
+use itertools::Position;
+
+impl Node {
+ /// Generate a human readable summary of the node
+ /// Examples include: key=value1/value2/.../valueN, key=value1/to/value1, key=*, root etc
+ pub fn summary(&self, qube: &Qube) -> String {
+ if self.is_root() {
+ return "root".to_string();
+ }
+ let key = &qube[self.key];
+ let values: String =
+ Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect();
+
+ format!("{}={}", key, values)
+ }
+
+ pub fn html_summary(&self, qube: &Qube) -> String {
+ if self.is_root() {
+ return r#"root"#.to_string();
+ }
+ let key = &qube[self.key];
+ let values: String =
+ Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect();
+
+ let summary = format!("{}={}", key, values);
+ let path = summary.clone();
+ let info = format!("is_root: {}", self.is_root());
+ format!(r#"{summary}"#)
+ }
+}
+
+struct NodeSummary {
+ summary: String,
+ end: NodeId,
+}
+
+enum SummaryType {
+ PlainText,
+ HTML,
+}
+
+/// Given a Node, traverse the tree until a node has more than one child.
+/// Returns a summary of the form "key1=v1/v2, key2=v1/v2/v3, key3=v1"
+/// and the id of the last node in the summary
+fn summarise_nodes(qube: &Qube, node_id: &NodeId, summary_type: SummaryType) -> NodeSummary {
+ let mut node_id = *node_id;
+ let mut summary_vec = vec![];
+ loop {
+ let node = &qube[node_id];
+ let summary = match summary_type {
+ SummaryType::PlainText => node.summary(&qube),
+ SummaryType::HTML => node.html_summary(&qube),
+ };
+ summary_vec.push(summary);
+
+ // Bail out if the node has anothing other than 1 child.
+ match node.has_exactly_one_child() {
+ Some(n) => node_id = n,
+ None => break,
+ };
+ }
+ NodeSummary {
+ summary: summary_vec.join(", "),
+ end: node_id,
+ }
+}
+
+fn qube_to_tree(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String {
+ let NodeSummary {
+ summary,
+ end: node_id,
+ } = summarise_nodes(qube, node_id, SummaryType::PlainText);
+
+ let mut output: Vec = Vec::new();
+
+ if depth <= 0 {
+ return format!("{} - ...\n", summary);
+ } else {
+ output.push(format!("{}\n", summary));
+ }
+
+ let node = &qube[node_id];
+ for (position, child_id) in node.children().with_position() {
+ let (connector, extension) = match position {
+ Position::Last | Position::Only => ("└── ", " "),
+ _ => ("├── ", "│ "),
+ };
+ output.extend([
+ prefix.to_string(),
+ connector.to_string(),
+ qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1),
+ ]);
+ }
+
+ output.join("")
+}
+
+fn qube_to_html(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String {
+ let NodeSummary {
+ summary,
+ end: node_id,
+ } = summarise_nodes(qube, node_id, SummaryType::PlainText);
+
+ let node = &qube[node_id];
+ let mut output: Vec = Vec::new();
+
+ let open = if depth > 0 { "open" } else { "" };
+ output.push(format!(
+ r#"{summary}
"#
+ ));
+
+ for (position, child_id) in node.children().with_position() {
+ let (connector, extension) = match position {
+ Position::Last | Position::Only => ("└── ", " "),
+ _ => ("├── ", "│ "),
+ };
+ output.extend([
+ prefix.to_string(),
+ connector.to_string(),
+ qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1),
+ ]);
+ }
+
+ output.join("")
+}
+
+impl Qube {
+ /// Return a string version of the Qube in the format
+ /// root
+ /// ├── class=od, expver=0001/0002, param=1/2
+ /// └── class=rd, param=1/2/3
+ pub fn string_tree(&self) -> String {
+ qube_to_tree(&self, &self.root, "", 5)
+ }
+
+ /// Return an HTML version of the Qube which renders like this
+ /// root
+ /// ├── class=od, expver=0001/0002, param=1/2
+ /// └── class=rd, param=1/2/3
+ /// But under the hood children are represented with a details/summary tag and each key=value is a span
+ /// CSS and JS functionality is bundled inside.
+ pub fn html_tree(&self) -> String {
+ qube_to_html(&self, &self.root, "", 5)
+ }
+}
diff --git a/src/rust/lib.rs b/src/rust/lib.rs
index f6eec70..e09b95d 100644
--- a/src/rust/lib.rs
+++ b/src/rust/lib.rs
@@ -3,14 +3,180 @@
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use pyo3::types::{PyDict, PyInt, PyList, PyString};
+use std::collections::HashMap;
+use pyo3::prelude::*;
+use std::hash::Hash;
-mod qube;
-mod json;
+
+use lasso::{Rodeo, Spur};
+use std::num::NonZero;
+use std::ops;
+
+mod serialisation;
+mod python_interface;
+mod formatters;
+
+// This data structure uses the Newtype Index Pattern
+// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html
+// See also https://github.com/nrc/r4cppp/blob/master/graphs/README.md#rcrefcellnode for a discussion of other approaches to trees and graphs in rust.
+// https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/
+
+// Index types use struct Id(NonZero)
+// This reserves 0 as a special value which allows Option)> to be the same size as usize.
+
+#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
+pub(crate) struct NodeId(NonZero);
+
+// Allow node indices to index directly into Qubes:
+impl ops::Index for Qube {
+ type Output = Node;
+
+ fn index(&self, index: NodeId) -> &Node {
+ &self.nodes[index.0.get() - 1]
+ }
+}
+
+impl ops::IndexMut for Qube {
+ fn index_mut(&mut self, index: NodeId) -> &mut Node {
+ &mut self.nodes[index.0.get() - 1]
+ }
+}
+
+impl ops::Index for Qube {
+ type Output = str;
+
+ fn index(&self, index: StringId) -> &str {
+ &self.strings[index]
+ }
+}
+
+impl NodeId {
+ pub fn new_infallible(value: NonZero) -> NodeId {
+ NodeId(value)
+ }
+ pub fn new(value: usize) -> Option {
+ NonZero::new(value).map(NodeId)
+ }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
+struct StringId(lasso::Spur);
+
+impl ops::Index for lasso::Rodeo {
+ type Output = str;
+
+ fn index(&self, index: StringId) -> &str {
+ &self[index.0]
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct Node {
+ pub key: StringId,
+ pub metadata: HashMap>,
+ pub parent: Option, // If not present, it's the root node
+ pub values: Vec,
+ pub children: HashMap>,
+}
+
+impl Node {
+ fn new_root(q: &mut Qube) -> Node {
+ Node {
+ key: q.get_or_intern("root"),
+ metadata: HashMap::new(),
+ parent: None,
+ values: vec![],
+ children: HashMap::new(),
+ }
+ }
+
+ fn children(&self) -> impl Iterator- {
+ self.children.values().flatten()
+ }
+
+ fn is_root(&self) -> bool {
+ self.parent.is_none()
+ }
+
+ /// Because children are stored grouped by key
+ /// determining the number of children quickly takes a little effort.
+ /// This is a fast method for the special case of checking if a Node has exactly one child.
+ /// Returns Ok(NodeId) if there is one child else None
+ fn has_exactly_one_child(&self) -> Option {
+ if self.children.len() != 1 {return None}
+ let Some(value_group) = self.children.values().next() else {return None};
+ let [node_id] = &value_group.as_slice() else {return None};
+ Some(*node_id)
+ }
+
+ fn n_children(&self) -> usize {
+ self.children
+ .values()
+ .map(|v| v.len())
+ .sum()
+ }
+}
+
+#[derive(Debug)]
+#[pyclass(subclass, dict)]
+pub struct Qube {
+ pub root: NodeId,
+ nodes: Vec,
+ strings: Rodeo,
+}
+
+impl Qube {
+ pub fn new() -> Self {
+ let mut q = Self {
+ root: NodeId::new(1).unwrap(),
+ nodes: Vec::new(),
+ strings: Rodeo::default(),
+ };
+
+ let root = Node::new_root(&mut q);
+ q.nodes.push(root);
+ q
+ }
+
+ fn get_or_intern(&mut self, val: &str) -> StringId {
+ StringId(self.strings.get_or_intern(val))
+ }
+
+ pub fn add_node(&mut self, parent: NodeId, key: &str, values: &[&str]) -> NodeId {
+ let key_id = self.get_or_intern(key);
+ let values = values.iter().map(|val| self.get_or_intern(val)).collect();
+
+ // Create the node object
+ let node = Node {
+ key: key_id,
+ metadata: HashMap::new(),
+ values: values,
+ parent: Some(parent),
+ children: HashMap::new(),
+ };
+
+ // Insert it into the Qube arena and determine its id
+ self.nodes.push(node);
+ let node_id = NodeId::new(self.nodes.len()).unwrap();
+
+ // Add a reference to this node's id to the parents list of children.
+ let parent_node = &mut self[parent];
+ let key_group = parent_node.children.entry(key_id).or_insert(Vec::new());
+ key_group.push(node_id);
+
+ node_id
+ }
+
+ fn print(&self, node_id: Option) -> String {
+ let node_id: NodeId = node_id.unwrap_or(self.root);
+ let node = &self[node_id];
+ node.summary(&self)
+ }
+}
#[pymodule]
fn rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
- m.add_class::()?;
- m.add_function(wrap_pyfunction!(json::parse_qube, m)?);
+ m.add_class::()?;
Ok(())
}
diff --git a/src/rust/python_interface.rs b/src/rust/python_interface.rs
new file mode 100644
index 0000000..53a7c46
--- /dev/null
+++ b/src/rust/python_interface.rs
@@ -0,0 +1,105 @@
+use crate::{Node, NodeId, Qube};
+use pyo3::prelude::*;
+use pyo3::types::PyList;
+use std::ops::Deref;
+
+use crate::serialisation;
+
+/// A reference to a particular node in a Qube
+#[pyclass]
+pub struct NodeRef {
+ id: NodeId,
+ qube: Py, // see https://pyo3.rs/v0.23.1/types for a discussion of Py and Bound<'py, T>
+}
+
+#[pymethods]
+impl NodeRef {
+ fn __repr__(&self, py: Python) -> PyResult {
+ // Get the Py reference, bind it to the GIL.
+ let qube = self.qube.bind(py);
+
+ fn repr_helper<'py>(node_id: NodeId, qube: &Bound<'py, Qube>) -> String {
+ let node = &qube.borrow()[node_id];
+ let key = &qube.borrow()[node.key];
+ let children = node
+ .children
+ .values()
+ .flatten()
+ .map(|child_id| repr_helper(child_id.clone(), qube))
+ .collect::>()
+ .join(", ");
+
+ format!("Node({}, {})", key, children)
+ }
+
+ Ok(repr_helper(self.id, qube))
+ }
+
+ fn __str__(&self, py: Python) -> String {
+ let qube = self.qube.bind(py).borrow();
+ let node = &qube[self.id];
+ let key = &qube.strings[node.key];
+ format!("Node({})", key)
+ }
+
+ #[getter]
+ pub fn get_children(&self, py: Python) -> Vec {
+ let qube = self.qube.bind(py).borrow();
+ let node = &qube[self.id];
+ node.children
+ .values()
+ .flatten()
+ .map(|child_id| NodeRef {
+ id: *child_id,
+ qube: self.qube.clone_ref(py),
+ })
+ .collect()
+ }
+}
+
+#[pymethods]
+impl Qube {
+ #[new]
+ pub fn py_new() -> Self {
+ Qube::new()
+ }
+
+ #[getter]
+ fn get_root(slf: Bound<'_, Self>) -> PyResult {
+ Ok(NodeRef {
+ id: slf.borrow().root,
+ qube: slf.unbind(),
+ })
+ }
+
+ fn __repr__(&self) -> String {
+ self.string_tree()
+ }
+
+ fn __str__<'py>(&self) -> String {
+ self.string_tree()
+ }
+
+ fn _repr_html_(&self) -> String {
+ self.html_tree()
+ }
+
+ #[pyo3(name = "print")]
+ fn py_print(&self) -> String {
+ self.print(Option::None)
+ }
+
+ #[getter]
+ pub fn get_children(slf: Bound<'_, Self>, py: Python) -> PyResult> {
+ let root = NodeRef {
+ id: slf.borrow().root,
+ qube: slf.unbind(),
+ };
+ Ok(root.get_children(py))
+ }
+
+ #[staticmethod]
+ pub fn from_json(data: &str) -> Result {
+ serialisation::from_json(data)
+ }
+}
diff --git a/src/rust/qube.rs b/src/rust/qube.rs
deleted file mode 100644
index aa7817a..0000000
--- a/src/rust/qube.rs
+++ /dev/null
@@ -1,205 +0,0 @@
-use std::collections::HashMap;
-use std::hash::Hash;
-
-use lasso::{Rodeo, Spur};
-use pyo3::prelude::*;
-use pyo3::types::PyList;
-use std::num::NonZero;
-use std::ops;
-use std::sync::Arc;
-
-// This data structure uses the Newtype Index Pattern
-// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html
-// See also https://github.com/nrc/r4cppp/blob/master/graphs/README.md#rcrefcellnode for a discussion of other approaches to trees and graphs in rust.
-// https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/
-
-// Index types use struct Id(NonZero)
-// This reserves 0 as a special value which allows Option)> to be the same size as usize.
-
-#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
-pub(crate) struct NodeId(NonZero);
-
-// Allow node indices to index directly into Qubes:
-impl ops::Index for Qube {
- type Output = Node;
-
- fn index(&self, index: NodeId) -> &Node {
- &self.nodes[index.0.get() - 1]
- }
-}
-
-impl ops::IndexMut for Qube {
- fn index_mut(&mut self, index: NodeId) -> &mut Node {
- &mut self.nodes[index.0.get() - 1]
- }
-}
-
-impl NodeId {
- pub fn new_infallible(value: NonZero) -> NodeId {
- NodeId(value)
- }
- pub fn new(value: usize) -> Option {
- NonZero::new(value).map(NodeId)
- }
-}
-
-#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
-struct StringId(lasso::Spur);
-
-impl ops::Index for lasso::Rodeo {
- type Output = str;
-
- fn index(&self, index: StringId) -> &str {
- &self[index.0]
- }
-}
-
-#[derive(Debug)]
-pub(crate) struct Node {
- key: StringId,
- metadata: HashMap>,
- parent: Option, // If not present, it's the root node
- values: Vec,
- children: HashMap>,
-}
-
-#[pyclass]
-pub struct NodeRef {
- id: NodeId,
- qube: Py,
-}
-
-#[pymethods]
-impl NodeRef {
- fn __repr__(&self, py: Python) -> PyResult {
- let qube = self.qube.bind(py).borrow();
- let node = &qube[self.id];
- let key = &qube.strings[node.key];
- let children = self
- .get_children(py)
- .iter()
- .map(|child| child.__repr__(py))
- .collect::, _>>()?
- .join(", ");
-
- Ok(format!("Node({}, {})", key, children))
- }
-
- fn __str__(&self, py: Python) -> String {
- let qube = self.qube.bind(py).borrow();
- let node = &qube[self.id];
- let key = &qube.strings[node.key];
- format!("Node({})", key)
- }
-
- #[getter]
- pub fn get_children(&self, py: Python) -> Vec {
- let qube = self.qube.bind(py).borrow();
- let node = &qube[self.id];
- node.children
- .values()
- .flatten()
- .map(|child_id| NodeRef {
- id: *child_id,
- qube: self.qube.clone_ref(py),
- })
- .collect()
- }
-}
-
-impl Node {
- fn new_root(q: &mut Qube) -> Node {
- Node {
- key: q.get_or_intern("root"),
- metadata: HashMap::new(),
- parent: None,
- values: vec![],
- children: HashMap::new(),
- }
- }
-
- fn children(&self) -> impl Iterator
- {
- self.children.values().flatten()
- }
-}
-
-#[derive(Debug)]
-#[pyclass]
-pub struct Qube {
- pub root: NodeId,
- nodes: Vec,
- strings: Rodeo,
-}
-
-impl Qube {
- fn get_or_intern(&mut self, val: &str) -> StringId {
- StringId(self.strings.get_or_intern(val))
- }
-
- pub fn add_node(&mut self, parent: NodeId, key: &str, values: &[&str]) -> NodeId {
- let key_id = self.get_or_intern(key);
- let values = values.iter().map(|val| self.get_or_intern(val)).collect();
-
- // Create the node object
- let node = Node {
- key: key_id,
- metadata: HashMap::new(),
- values: values,
- parent: Some(parent),
- children: HashMap::new(),
- };
-
- // Insert it into the Qube arena and determine its id
- self.nodes.push(node);
- let node_id = NodeId::new(self.nodes.len()).unwrap();
-
- // Add a reference to this node's id to the parents list of children.
- let parent_node = &mut self[parent];
- let key_group = parent_node.children.entry(key_id).or_insert(Vec::new());
- key_group.push(node_id);
-
- node_id
- }
-}
-
-#[pymethods]
-impl Qube {
- #[new]
- pub fn new() -> Self {
- let mut q = Qube {
- root: NodeId::new(1).unwrap(),
- nodes: Vec::new(),
- strings: Rodeo::default(),
- };
-
- let root = Node::new_root(&mut q);
- q.nodes.push(root);
- q
- }
-
- #[getter]
- fn get_root<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult {
- Ok(NodeRef {
- id: slf.root,
- qube: slf.into(),
- })
- }
-
- fn __repr__(&self) -> String {
- format!("{:?}", &self)
- }
-
- fn __str__<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> String {
- format!("Qube()")
- }
-
- #[getter]
- pub fn get_children<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult> {
- let root = NodeRef {
- id: slf.root,
- // `into_py` clones the existing Python handle; no new Qube object is allocated.
- qube: slf.into(),
- };
- Ok(root.get_children(py))
- }
-}
diff --git a/src/rust/json.rs b/src/rust/serialisation/json.rs
similarity index 64%
rename from src/rust/json.rs
rename to src/rust/serialisation/json.rs
index 2520646..bb4c94e 100644
--- a/src/rust/json.rs
+++ b/src/rust/serialisation/json.rs
@@ -1,9 +1,27 @@
+use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use serde::{Deserialize, Serialize};
-use serde_json::{Result, Value};
+use serde_json::Value;
use std::collections::HashMap;
-use crate::qube::{Node, NodeId, Qube};
+use crate::{Node, NodeId, Qube};
+
+// Use a newtype wrapper to allow us to implement auto conversion from serde_json::Error to PyErr
+// via a wrapper intermediate
+// see https://pyo3.rs/main/function/error-handling.html#foreign-rust-error-types
+pub struct JSONError(serde_json::Error);
+
+impl From for PyErr {
+ fn from(error: JSONError) -> Self {
+ PyValueError::new_err(format!("{}", error.0))
+ }
+}
+
+impl From for JSONError {
+ fn from(other: serde_json::Error) -> Self {
+ Self(other)
+ }
+}
#[derive(Serialize, Deserialize, Debug)]
#[serde(untagged)]
@@ -37,10 +55,7 @@ fn add_nodes(qube: &mut Qube, parent: NodeId, nodes: &[JSONQube]) -> Vec
.collect()
}
-#[pyfunction]
-pub fn parse_qube() -> PyResult {
- let data = r#"{"key": "root", "values": ["root"], "metadata": {}, "children": [{"key": "frequency", "values": "*", "metadata": {}, "children": [{"key": "levtype", "values": "*", "metadata": {}, "children": [{"key": "param", "values": "*", "metadata": {}, "children": [{"key": "levelist", "values": "*", "metadata": {}, "children": [{"key": "domain", "values": ["a", "b", "c", "d"], "metadata": {}, "children": []}]}]}]}]}]}"#;
-
+pub fn from_json(data: &str) -> Result {
// Parse the string of data into serde_json::Value.
let json_qube: JSONQube = serde_json::from_str(data).expect("JSON parsing failed");
diff --git a/src/rust/serialisation/mod.rs b/src/rust/serialisation/mod.rs
new file mode 100644
index 0000000..881d8f7
--- /dev/null
+++ b/src/rust/serialisation/mod.rs
@@ -0,0 +1,2 @@
+mod json;
+pub use json::{from_json, JSONError};
diff --git a/test_scripts/rust.py b/test_scripts/rust.py
index 418a75f..5d84567 100644
--- a/test_scripts/rust.py
+++ b/test_scripts/rust.py
@@ -1,13 +1,21 @@
-from qubed.rust import Qube, parse_qube
+from __future__ import annotations
-q = Qube()
-print(q)
+import json
-print(f"repr: {q.root!r} str: {q.root}")
+from qubed import Qube as pyQube
+from qubed.rust import Qube as Qube
-q = parse_qube()
-print(repr(q))
+q = pyQube.from_tree("""
+root, class=d1
+├── dataset=another-value, generation=1/2/3
+└── dataset=climate-dt/weather-dt, generation=1/2/3/4
+""")
+json_str = json.dumps(q.to_json())
+rust_qube = Qube.from_json(json_str)
+print(repr(rust_qube))
-r = q.root
-
-print(f"{q.root = }, {q.children = }")
+expected = """root, class=d1
+├── dataset=another-value, generation=1/2/3
+└── dataset=climate-dt/weather-dt, generation=1/2/3/4"""
+assert repr(rust_qube) == expected
+# print(rs_qube._repr_html_())
diff --git a/tests/test_rust.py b/tests/test_rust.py
index e69de29..6f5fc5a 100644
--- a/tests/test_rust.py
+++ b/tests/test_rust.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import json
+
+from qubed import Qube as pyQube
+from qubed.rust import Qube as Qube
+
+q = pyQube.from_tree("""
+root, class=d1
+├── dataset=another-value, generation=1/2/3
+└── dataset=climate-dt/weather-dt, generation=1/2/3/4
+""")
+json_str = json.dumps(q.to_json())
+rust_qube = Qube.from_json(json_str)
+print(repr(rust_qube))
+
+expected = """root, class=d1
+├── dataset=another-value, generation=1/2/3
+└── dataset=climate-dt/weather-dt, generation=1/2/3/4
+"""
+assert repr(rust_qube) == expected