diff --git a/Cargo.toml b/Cargo.toml index 3d16686..1af4769 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" pyo3 = "0.25" lasso = "0.7.3" +itertools = "0.14.0" [package.metadata.maturin] version-from-git = true diff --git a/src/rust/fdb.rs b/src/rust/connectors/fdb.rs similarity index 100% rename from src/rust/fdb.rs rename to src/rust/connectors/fdb.rs diff --git a/src/rust/formatters/mod.rs b/src/rust/formatters/mod.rs new file mode 100644 index 0000000..8dbbdd2 --- /dev/null +++ b/src/rust/formatters/mod.rs @@ -0,0 +1,147 @@ +use crate::{Node, NodeId, Qube}; +use itertools::Itertools; +use itertools::Position; + +impl Node { + /// Generate a human readable summary of the node + /// Examples include: key=value1/value2/.../valueN, key=value1/to/value1, key=*, root etc + pub fn summary(&self, qube: &Qube) -> String { + if self.is_root() { + return "root".to_string(); + } + let key = &qube[self.key]; + let values: String = + Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect(); + + format!("{}={}", key, values) + } + + pub fn html_summary(&self, qube: &Qube) -> String { + if self.is_root() { + return r#"root"#.to_string(); + } + let key = &qube[self.key]; + let values: String = + Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect(); + + let summary = format!("{}={}", key, values); + let path = summary.clone(); + let info = format!("is_root: {}", self.is_root()); + format!(r#"{summary}"#) + } +} + +struct NodeSummary { + summary: String, + end: NodeId, +} + +enum SummaryType { + PlainText, + HTML, +} + +/// Given a Node, traverse the tree until a node has more than one child. +/// Returns a summary of the form "key1=v1/v2, key2=v1/v2/v3, key3=v1" +/// and the id of the last node in the summary +fn summarise_nodes(qube: &Qube, node_id: &NodeId, summary_type: SummaryType) -> NodeSummary { + let mut node_id = *node_id; + let mut summary_vec = vec![]; + loop { + let node = &qube[node_id]; + let summary = match summary_type { + SummaryType::PlainText => node.summary(&qube), + SummaryType::HTML => node.html_summary(&qube), + }; + summary_vec.push(summary); + + // Bail out if the node has anothing other than 1 child. + match node.has_exactly_one_child() { + Some(n) => node_id = n, + None => break, + }; + } + NodeSummary { + summary: summary_vec.join(", "), + end: node_id, + } +} + +fn qube_to_tree(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String { + let NodeSummary { + summary, + end: node_id, + } = summarise_nodes(qube, node_id, SummaryType::PlainText); + + let mut output: Vec = Vec::new(); + + if depth <= 0 { + return format!("{} - ...\n", summary); + } else { + output.push(format!("{}\n", summary)); + } + + let node = &qube[node_id]; + for (position, child_id) in node.children().with_position() { + let (connector, extension) = match position { + Position::Last | Position::Only => ("└── ", " "), + _ => ("├── ", "│ "), + }; + output.extend([ + prefix.to_string(), + connector.to_string(), + qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1), + ]); + } + + output.join("") +} + +fn qube_to_html(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String { + let NodeSummary { + summary, + end: node_id, + } = summarise_nodes(qube, node_id, SummaryType::PlainText); + + let node = &qube[node_id]; + let mut output: Vec = Vec::new(); + + let open = if depth > 0 { "open" } else { "" }; + output.push(format!( + r#"
{summary}"# + )); + + for (position, child_id) in node.children().with_position() { + let (connector, extension) = match position { + Position::Last | Position::Only => ("└── ", " "), + _ => ("├── ", "│ "), + }; + output.extend([ + prefix.to_string(), + connector.to_string(), + qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1), + ]); + } + + output.join("") +} + +impl Qube { + /// Return a string version of the Qube in the format + /// root + /// ├── class=od, expver=0001/0002, param=1/2 + /// └── class=rd, param=1/2/3 + pub fn string_tree(&self) -> String { + qube_to_tree(&self, &self.root, "", 5) + } + + /// Return an HTML version of the Qube which renders like this + /// root + /// ├── class=od, expver=0001/0002, param=1/2 + /// └── class=rd, param=1/2/3 + /// But under the hood children are represented with a details/summary tag and each key=value is a span + /// CSS and JS functionality is bundled inside. + pub fn html_tree(&self) -> String { + qube_to_html(&self, &self.root, "", 5) + } +} diff --git a/src/rust/lib.rs b/src/rust/lib.rs index f6eec70..e09b95d 100644 --- a/src/rust/lib.rs +++ b/src/rust/lib.rs @@ -3,14 +3,180 @@ use pyo3::prelude::*; use pyo3::wrap_pyfunction; use pyo3::types::{PyDict, PyInt, PyList, PyString}; +use std::collections::HashMap; +use pyo3::prelude::*; +use std::hash::Hash; -mod qube; -mod json; + +use lasso::{Rodeo, Spur}; +use std::num::NonZero; +use std::ops; + +mod serialisation; +mod python_interface; +mod formatters; + +// This data structure uses the Newtype Index Pattern +// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html +// See also https://github.com/nrc/r4cppp/blob/master/graphs/README.md#rcrefcellnode for a discussion of other approaches to trees and graphs in rust. +// https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/ + +// Index types use struct Id(NonZero) +// This reserves 0 as a special value which allows Option)> to be the same size as usize. + +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)] +pub(crate) struct NodeId(NonZero); + +// Allow node indices to index directly into Qubes: +impl ops::Index for Qube { + type Output = Node; + + fn index(&self, index: NodeId) -> &Node { + &self.nodes[index.0.get() - 1] + } +} + +impl ops::IndexMut for Qube { + fn index_mut(&mut self, index: NodeId) -> &mut Node { + &mut self.nodes[index.0.get() - 1] + } +} + +impl ops::Index for Qube { + type Output = str; + + fn index(&self, index: StringId) -> &str { + &self.strings[index] + } +} + +impl NodeId { + pub fn new_infallible(value: NonZero) -> NodeId { + NodeId(value) + } + pub fn new(value: usize) -> Option { + NonZero::new(value).map(NodeId) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)] +struct StringId(lasso::Spur); + +impl ops::Index for lasso::Rodeo { + type Output = str; + + fn index(&self, index: StringId) -> &str { + &self[index.0] + } +} + +#[derive(Debug)] +pub(crate) struct Node { + pub key: StringId, + pub metadata: HashMap>, + pub parent: Option, // If not present, it's the root node + pub values: Vec, + pub children: HashMap>, +} + +impl Node { + fn new_root(q: &mut Qube) -> Node { + Node { + key: q.get_or_intern("root"), + metadata: HashMap::new(), + parent: None, + values: vec![], + children: HashMap::new(), + } + } + + fn children(&self) -> impl Iterator { + self.children.values().flatten() + } + + fn is_root(&self) -> bool { + self.parent.is_none() + } + + /// Because children are stored grouped by key + /// determining the number of children quickly takes a little effort. + /// This is a fast method for the special case of checking if a Node has exactly one child. + /// Returns Ok(NodeId) if there is one child else None + fn has_exactly_one_child(&self) -> Option { + if self.children.len() != 1 {return None} + let Some(value_group) = self.children.values().next() else {return None}; + let [node_id] = &value_group.as_slice() else {return None}; + Some(*node_id) + } + + fn n_children(&self) -> usize { + self.children + .values() + .map(|v| v.len()) + .sum() + } +} + +#[derive(Debug)] +#[pyclass(subclass, dict)] +pub struct Qube { + pub root: NodeId, + nodes: Vec, + strings: Rodeo, +} + +impl Qube { + pub fn new() -> Self { + let mut q = Self { + root: NodeId::new(1).unwrap(), + nodes: Vec::new(), + strings: Rodeo::default(), + }; + + let root = Node::new_root(&mut q); + q.nodes.push(root); + q + } + + fn get_or_intern(&mut self, val: &str) -> StringId { + StringId(self.strings.get_or_intern(val)) + } + + pub fn add_node(&mut self, parent: NodeId, key: &str, values: &[&str]) -> NodeId { + let key_id = self.get_or_intern(key); + let values = values.iter().map(|val| self.get_or_intern(val)).collect(); + + // Create the node object + let node = Node { + key: key_id, + metadata: HashMap::new(), + values: values, + parent: Some(parent), + children: HashMap::new(), + }; + + // Insert it into the Qube arena and determine its id + self.nodes.push(node); + let node_id = NodeId::new(self.nodes.len()).unwrap(); + + // Add a reference to this node's id to the parents list of children. + let parent_node = &mut self[parent]; + let key_group = parent_node.children.entry(key_id).or_insert(Vec::new()); + key_group.push(node_id); + + node_id + } + + fn print(&self, node_id: Option) -> String { + let node_id: NodeId = node_id.unwrap_or(self.root); + let node = &self[node_id]; + node.summary(&self) + } +} #[pymodule] fn rust(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_function(wrap_pyfunction!(json::parse_qube, m)?); + m.add_class::()?; Ok(()) } diff --git a/src/rust/python_interface.rs b/src/rust/python_interface.rs new file mode 100644 index 0000000..53a7c46 --- /dev/null +++ b/src/rust/python_interface.rs @@ -0,0 +1,105 @@ +use crate::{Node, NodeId, Qube}; +use pyo3::prelude::*; +use pyo3::types::PyList; +use std::ops::Deref; + +use crate::serialisation; + +/// A reference to a particular node in a Qube +#[pyclass] +pub struct NodeRef { + id: NodeId, + qube: Py, // see https://pyo3.rs/v0.23.1/types for a discussion of Py and Bound<'py, T> +} + +#[pymethods] +impl NodeRef { + fn __repr__(&self, py: Python) -> PyResult { + // Get the Py reference, bind it to the GIL. + let qube = self.qube.bind(py); + + fn repr_helper<'py>(node_id: NodeId, qube: &Bound<'py, Qube>) -> String { + let node = &qube.borrow()[node_id]; + let key = &qube.borrow()[node.key]; + let children = node + .children + .values() + .flatten() + .map(|child_id| repr_helper(child_id.clone(), qube)) + .collect::>() + .join(", "); + + format!("Node({}, {})", key, children) + } + + Ok(repr_helper(self.id, qube)) + } + + fn __str__(&self, py: Python) -> String { + let qube = self.qube.bind(py).borrow(); + let node = &qube[self.id]; + let key = &qube.strings[node.key]; + format!("Node({})", key) + } + + #[getter] + pub fn get_children(&self, py: Python) -> Vec { + let qube = self.qube.bind(py).borrow(); + let node = &qube[self.id]; + node.children + .values() + .flatten() + .map(|child_id| NodeRef { + id: *child_id, + qube: self.qube.clone_ref(py), + }) + .collect() + } +} + +#[pymethods] +impl Qube { + #[new] + pub fn py_new() -> Self { + Qube::new() + } + + #[getter] + fn get_root(slf: Bound<'_, Self>) -> PyResult { + Ok(NodeRef { + id: slf.borrow().root, + qube: slf.unbind(), + }) + } + + fn __repr__(&self) -> String { + self.string_tree() + } + + fn __str__<'py>(&self) -> String { + self.string_tree() + } + + fn _repr_html_(&self) -> String { + self.html_tree() + } + + #[pyo3(name = "print")] + fn py_print(&self) -> String { + self.print(Option::None) + } + + #[getter] + pub fn get_children(slf: Bound<'_, Self>, py: Python) -> PyResult> { + let root = NodeRef { + id: slf.borrow().root, + qube: slf.unbind(), + }; + Ok(root.get_children(py)) + } + + #[staticmethod] + pub fn from_json(data: &str) -> Result { + serialisation::from_json(data) + } +} diff --git a/src/rust/qube.rs b/src/rust/qube.rs deleted file mode 100644 index aa7817a..0000000 --- a/src/rust/qube.rs +++ /dev/null @@ -1,205 +0,0 @@ -use std::collections::HashMap; -use std::hash::Hash; - -use lasso::{Rodeo, Spur}; -use pyo3::prelude::*; -use pyo3::types::PyList; -use std::num::NonZero; -use std::ops; -use std::sync::Arc; - -// This data structure uses the Newtype Index Pattern -// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html -// See also https://github.com/nrc/r4cppp/blob/master/graphs/README.md#rcrefcellnode for a discussion of other approaches to trees and graphs in rust. -// https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/ - -// Index types use struct Id(NonZero) -// This reserves 0 as a special value which allows Option)> to be the same size as usize. - -#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)] -pub(crate) struct NodeId(NonZero); - -// Allow node indices to index directly into Qubes: -impl ops::Index for Qube { - type Output = Node; - - fn index(&self, index: NodeId) -> &Node { - &self.nodes[index.0.get() - 1] - } -} - -impl ops::IndexMut for Qube { - fn index_mut(&mut self, index: NodeId) -> &mut Node { - &mut self.nodes[index.0.get() - 1] - } -} - -impl NodeId { - pub fn new_infallible(value: NonZero) -> NodeId { - NodeId(value) - } - pub fn new(value: usize) -> Option { - NonZero::new(value).map(NodeId) - } -} - -#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)] -struct StringId(lasso::Spur); - -impl ops::Index for lasso::Rodeo { - type Output = str; - - fn index(&self, index: StringId) -> &str { - &self[index.0] - } -} - -#[derive(Debug)] -pub(crate) struct Node { - key: StringId, - metadata: HashMap>, - parent: Option, // If not present, it's the root node - values: Vec, - children: HashMap>, -} - -#[pyclass] -pub struct NodeRef { - id: NodeId, - qube: Py, -} - -#[pymethods] -impl NodeRef { - fn __repr__(&self, py: Python) -> PyResult { - let qube = self.qube.bind(py).borrow(); - let node = &qube[self.id]; - let key = &qube.strings[node.key]; - let children = self - .get_children(py) - .iter() - .map(|child| child.__repr__(py)) - .collect::, _>>()? - .join(", "); - - Ok(format!("Node({}, {})", key, children)) - } - - fn __str__(&self, py: Python) -> String { - let qube = self.qube.bind(py).borrow(); - let node = &qube[self.id]; - let key = &qube.strings[node.key]; - format!("Node({})", key) - } - - #[getter] - pub fn get_children(&self, py: Python) -> Vec { - let qube = self.qube.bind(py).borrow(); - let node = &qube[self.id]; - node.children - .values() - .flatten() - .map(|child_id| NodeRef { - id: *child_id, - qube: self.qube.clone_ref(py), - }) - .collect() - } -} - -impl Node { - fn new_root(q: &mut Qube) -> Node { - Node { - key: q.get_or_intern("root"), - metadata: HashMap::new(), - parent: None, - values: vec![], - children: HashMap::new(), - } - } - - fn children(&self) -> impl Iterator { - self.children.values().flatten() - } -} - -#[derive(Debug)] -#[pyclass] -pub struct Qube { - pub root: NodeId, - nodes: Vec, - strings: Rodeo, -} - -impl Qube { - fn get_or_intern(&mut self, val: &str) -> StringId { - StringId(self.strings.get_or_intern(val)) - } - - pub fn add_node(&mut self, parent: NodeId, key: &str, values: &[&str]) -> NodeId { - let key_id = self.get_or_intern(key); - let values = values.iter().map(|val| self.get_or_intern(val)).collect(); - - // Create the node object - let node = Node { - key: key_id, - metadata: HashMap::new(), - values: values, - parent: Some(parent), - children: HashMap::new(), - }; - - // Insert it into the Qube arena and determine its id - self.nodes.push(node); - let node_id = NodeId::new(self.nodes.len()).unwrap(); - - // Add a reference to this node's id to the parents list of children. - let parent_node = &mut self[parent]; - let key_group = parent_node.children.entry(key_id).or_insert(Vec::new()); - key_group.push(node_id); - - node_id - } -} - -#[pymethods] -impl Qube { - #[new] - pub fn new() -> Self { - let mut q = Qube { - root: NodeId::new(1).unwrap(), - nodes: Vec::new(), - strings: Rodeo::default(), - }; - - let root = Node::new_root(&mut q); - q.nodes.push(root); - q - } - - #[getter] - fn get_root<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult { - Ok(NodeRef { - id: slf.root, - qube: slf.into(), - }) - } - - fn __repr__(&self) -> String { - format!("{:?}", &self) - } - - fn __str__<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> String { - format!("Qube()") - } - - #[getter] - pub fn get_children<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult> { - let root = NodeRef { - id: slf.root, - // `into_py` clones the existing Python handle; no new Qube object is allocated. - qube: slf.into(), - }; - Ok(root.get_children(py)) - } -} diff --git a/src/rust/json.rs b/src/rust/serialisation/json.rs similarity index 64% rename from src/rust/json.rs rename to src/rust/serialisation/json.rs index 2520646..bb4c94e 100644 --- a/src/rust/json.rs +++ b/src/rust/serialisation/json.rs @@ -1,9 +1,27 @@ +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use serde::{Deserialize, Serialize}; -use serde_json::{Result, Value}; +use serde_json::Value; use std::collections::HashMap; -use crate::qube::{Node, NodeId, Qube}; +use crate::{Node, NodeId, Qube}; + +// Use a newtype wrapper to allow us to implement auto conversion from serde_json::Error to PyErr +// via a wrapper intermediate +// see https://pyo3.rs/main/function/error-handling.html#foreign-rust-error-types +pub struct JSONError(serde_json::Error); + +impl From for PyErr { + fn from(error: JSONError) -> Self { + PyValueError::new_err(format!("{}", error.0)) + } +} + +impl From for JSONError { + fn from(other: serde_json::Error) -> Self { + Self(other) + } +} #[derive(Serialize, Deserialize, Debug)] #[serde(untagged)] @@ -37,10 +55,7 @@ fn add_nodes(qube: &mut Qube, parent: NodeId, nodes: &[JSONQube]) -> Vec .collect() } -#[pyfunction] -pub fn parse_qube() -> PyResult { - let data = r#"{"key": "root", "values": ["root"], "metadata": {}, "children": [{"key": "frequency", "values": "*", "metadata": {}, "children": [{"key": "levtype", "values": "*", "metadata": {}, "children": [{"key": "param", "values": "*", "metadata": {}, "children": [{"key": "levelist", "values": "*", "metadata": {}, "children": [{"key": "domain", "values": ["a", "b", "c", "d"], "metadata": {}, "children": []}]}]}]}]}]}"#; - +pub fn from_json(data: &str) -> Result { // Parse the string of data into serde_json::Value. let json_qube: JSONQube = serde_json::from_str(data).expect("JSON parsing failed"); diff --git a/src/rust/serialisation/mod.rs b/src/rust/serialisation/mod.rs new file mode 100644 index 0000000..881d8f7 --- /dev/null +++ b/src/rust/serialisation/mod.rs @@ -0,0 +1,2 @@ +mod json; +pub use json::{from_json, JSONError}; diff --git a/test_scripts/rust.py b/test_scripts/rust.py index 418a75f..5d84567 100644 --- a/test_scripts/rust.py +++ b/test_scripts/rust.py @@ -1,13 +1,21 @@ -from qubed.rust import Qube, parse_qube +from __future__ import annotations -q = Qube() -print(q) +import json -print(f"repr: {q.root!r} str: {q.root}") +from qubed import Qube as pyQube +from qubed.rust import Qube as Qube -q = parse_qube() -print(repr(q)) +q = pyQube.from_tree(""" +root, class=d1 +├── dataset=another-value, generation=1/2/3 +└── dataset=climate-dt/weather-dt, generation=1/2/3/4 +""") +json_str = json.dumps(q.to_json()) +rust_qube = Qube.from_json(json_str) +print(repr(rust_qube)) -r = q.root - -print(f"{q.root = }, {q.children = }") +expected = """root, class=d1 +├── dataset=another-value, generation=1/2/3 +└── dataset=climate-dt/weather-dt, generation=1/2/3/4""" +assert repr(rust_qube) == expected +# print(rs_qube._repr_html_()) diff --git a/tests/test_rust.py b/tests/test_rust.py index e69de29..6f5fc5a 100644 --- a/tests/test_rust.py +++ b/tests/test_rust.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import json + +from qubed import Qube as pyQube +from qubed.rust import Qube as Qube + +q = pyQube.from_tree(""" +root, class=d1 +├── dataset=another-value, generation=1/2/3 +└── dataset=climate-dt/weather-dt, generation=1/2/3/4 +""") +json_str = json.dumps(q.to_json()) +rust_qube = Qube.from_json(json_str) +print(repr(rust_qube)) + +expected = """root, class=d1 +├── dataset=another-value, generation=1/2/3 +└── dataset=climate-dt/weather-dt, generation=1/2/3/4 +""" +assert repr(rust_qube) == expected