flesh out rust implementation

This commit is contained in:
Tom 2025-05-22 14:40:44 +01:00
parent 959dac332d
commit 90ea736c43
10 changed files with 484 additions and 224 deletions

View File

@ -10,6 +10,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
pyo3 = "0.25"
lasso = "0.7.3"
itertools = "0.14.0"
[package.metadata.maturin]
version-from-git = true

147
src/rust/formatters/mod.rs Normal file
View File

@ -0,0 +1,147 @@
use crate::{Node, NodeId, Qube};
use itertools::Itertools;
use itertools::Position;
impl Node {
/// Generate a human readable summary of the node
/// Examples include: key=value1/value2/.../valueN, key=value1/to/value1, key=*, root etc
pub fn summary(&self, qube: &Qube) -> String {
if self.is_root() {
return "root".to_string();
}
let key = &qube[self.key];
let values: String =
Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect();
format!("{}={}", key, values)
}
pub fn html_summary(&self, qube: &Qube) -> String {
if self.is_root() {
return r#"<span class="qubed-node">root</span>"#.to_string();
}
let key = &qube[self.key];
let values: String =
Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect();
let summary = format!("{}={}", key, values);
let path = summary.clone();
let info = format!("is_root: {}", self.is_root());
format!(r#"<span class="qubed-node" data-path="{path}" title="{info}">{summary}</span>"#)
}
}
struct NodeSummary {
summary: String,
end: NodeId,
}
enum SummaryType {
PlainText,
HTML,
}
/// Given a Node, traverse the tree until a node has more than one child.
/// Returns a summary of the form "key1=v1/v2, key2=v1/v2/v3, key3=v1"
/// and the id of the last node in the summary
fn summarise_nodes(qube: &Qube, node_id: &NodeId, summary_type: SummaryType) -> NodeSummary {
let mut node_id = *node_id;
let mut summary_vec = vec![];
loop {
let node = &qube[node_id];
let summary = match summary_type {
SummaryType::PlainText => node.summary(&qube),
SummaryType::HTML => node.html_summary(&qube),
};
summary_vec.push(summary);
// Bail out if the node has anothing other than 1 child.
match node.has_exactly_one_child() {
Some(n) => node_id = n,
None => break,
};
}
NodeSummary {
summary: summary_vec.join(", "),
end: node_id,
}
}
fn qube_to_tree(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String {
let NodeSummary {
summary,
end: node_id,
} = summarise_nodes(qube, node_id, SummaryType::PlainText);
let mut output: Vec<String> = Vec::new();
if depth <= 0 {
return format!("{} - ...\n", summary);
} else {
output.push(format!("{}\n", summary));
}
let node = &qube[node_id];
for (position, child_id) in node.children().with_position() {
let (connector, extension) = match position {
Position::Last | Position::Only => ("└── ", " "),
_ => ("├── ", ""),
};
output.extend([
prefix.to_string(),
connector.to_string(),
qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1),
]);
}
output.join("")
}
fn qube_to_html(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String {
let NodeSummary {
summary,
end: node_id,
} = summarise_nodes(qube, node_id, SummaryType::PlainText);
let node = &qube[node_id];
let mut output: Vec<String> = Vec::new();
let open = if depth > 0 { "open" } else { "" };
output.push(format!(
r#"<details {open}><summary class="qubed-level">{summary}</summary>"#
));
for (position, child_id) in node.children().with_position() {
let (connector, extension) = match position {
Position::Last | Position::Only => ("└── ", " "),
_ => ("├── ", ""),
};
output.extend([
prefix.to_string(),
connector.to_string(),
qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1),
]);
}
output.join("")
}
impl Qube {
/// Return a string version of the Qube in the format
/// root
/// ├── class=od, expver=0001/0002, param=1/2
/// └── class=rd, param=1/2/3
pub fn string_tree(&self) -> String {
qube_to_tree(&self, &self.root, "", 5)
}
/// Return an HTML version of the Qube which renders like this
/// root
/// ├── class=od, expver=0001/0002, param=1/2
/// └── class=rd, param=1/2/3
/// But under the hood children are represented with a details/summary tag and each key=value is a span
/// CSS and JS functionality is bundled inside.
pub fn html_tree(&self) -> String {
qube_to_html(&self, &self.root, "", 5)
}
}

View File

@ -3,14 +3,180 @@
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use pyo3::types::{PyDict, PyInt, PyList, PyString};
use std::collections::HashMap;
use pyo3::prelude::*;
use std::hash::Hash;
mod qube;
mod json;
use lasso::{Rodeo, Spur};
use std::num::NonZero;
use std::ops;
mod serialisation;
mod python_interface;
mod formatters;
// This data structure uses the Newtype Index Pattern
// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html
// See also https://github.com/nrc/r4cppp/blob/master/graphs/README.md#rcrefcellnode for a discussion of other approaches to trees and graphs in rust.
// https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/
// Index types use struct Id(NonZero<usize>)
// This reserves 0 as a special value which allows Option<Id(NonZero<usize>)> to be the same size as usize.
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
pub(crate) struct NodeId(NonZero<usize>);
// Allow node indices to index directly into Qubes:
impl ops::Index<NodeId> for Qube {
type Output = Node;
fn index(&self, index: NodeId) -> &Node {
&self.nodes[index.0.get() - 1]
}
}
impl ops::IndexMut<NodeId> for Qube {
fn index_mut(&mut self, index: NodeId) -> &mut Node {
&mut self.nodes[index.0.get() - 1]
}
}
impl ops::Index<StringId> for Qube {
type Output = str;
fn index(&self, index: StringId) -> &str {
&self.strings[index]
}
}
impl NodeId {
pub fn new_infallible(value: NonZero<usize>) -> NodeId {
NodeId(value)
}
pub fn new(value: usize) -> Option<NodeId> {
NonZero::new(value).map(NodeId)
}
}
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
struct StringId(lasso::Spur);
impl ops::Index<StringId> for lasso::Rodeo {
type Output = str;
fn index(&self, index: StringId) -> &str {
&self[index.0]
}
}
#[derive(Debug)]
pub(crate) struct Node {
pub key: StringId,
pub metadata: HashMap<StringId, Vec<String>>,
pub parent: Option<NodeId>, // If not present, it's the root node
pub values: Vec<StringId>,
pub children: HashMap<StringId, Vec<NodeId>>,
}
impl Node {
fn new_root(q: &mut Qube) -> Node {
Node {
key: q.get_or_intern("root"),
metadata: HashMap::new(),
parent: None,
values: vec![],
children: HashMap::new(),
}
}
fn children(&self) -> impl Iterator<Item = &NodeId> {
self.children.values().flatten()
}
fn is_root(&self) -> bool {
self.parent.is_none()
}
/// Because children are stored grouped by key
/// determining the number of children quickly takes a little effort.
/// This is a fast method for the special case of checking if a Node has exactly one child.
/// Returns Ok(NodeId) if there is one child else None
fn has_exactly_one_child(&self) -> Option<NodeId> {
if self.children.len() != 1 {return None}
let Some(value_group) = self.children.values().next() else {return None};
let [node_id] = &value_group.as_slice() else {return None};
Some(*node_id)
}
fn n_children(&self) -> usize {
self.children
.values()
.map(|v| v.len())
.sum()
}
}
#[derive(Debug)]
#[pyclass(subclass, dict)]
pub struct Qube {
pub root: NodeId,
nodes: Vec<Node>,
strings: Rodeo,
}
impl Qube {
pub fn new() -> Self {
let mut q = Self {
root: NodeId::new(1).unwrap(),
nodes: Vec::new(),
strings: Rodeo::default(),
};
let root = Node::new_root(&mut q);
q.nodes.push(root);
q
}
fn get_or_intern(&mut self, val: &str) -> StringId {
StringId(self.strings.get_or_intern(val))
}
pub fn add_node(&mut self, parent: NodeId, key: &str, values: &[&str]) -> NodeId {
let key_id = self.get_or_intern(key);
let values = values.iter().map(|val| self.get_or_intern(val)).collect();
// Create the node object
let node = Node {
key: key_id,
metadata: HashMap::new(),
values: values,
parent: Some(parent),
children: HashMap::new(),
};
// Insert it into the Qube arena and determine its id
self.nodes.push(node);
let node_id = NodeId::new(self.nodes.len()).unwrap();
// Add a reference to this node's id to the parents list of children.
let parent_node = &mut self[parent];
let key_group = parent_node.children.entry(key_id).or_insert(Vec::new());
key_group.push(node_id);
node_id
}
fn print(&self, node_id: Option<NodeId>) -> String {
let node_id: NodeId = node_id.unwrap_or(self.root);
let node = &self[node_id];
node.summary(&self)
}
}
#[pymodule]
fn rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<qube::Qube>()?;
m.add_function(wrap_pyfunction!(json::parse_qube, m)?);
m.add_class::<Qube>()?;
Ok(())
}

View File

@ -0,0 +1,105 @@
use crate::{Node, NodeId, Qube};
use pyo3::prelude::*;
use pyo3::types::PyList;
use std::ops::Deref;
use crate::serialisation;
/// A reference to a particular node in a Qube
#[pyclass]
pub struct NodeRef {
id: NodeId,
qube: Py<Qube>, // see https://pyo3.rs/v0.23.1/types for a discussion of Py<T> and Bound<'py, T>
}
#[pymethods]
impl NodeRef {
fn __repr__(&self, py: Python) -> PyResult<String> {
// Get the Py<Qube> reference, bind it to the GIL.
let qube = self.qube.bind(py);
fn repr_helper<'py>(node_id: NodeId, qube: &Bound<'py, Qube>) -> String {
let node = &qube.borrow()[node_id];
let key = &qube.borrow()[node.key];
let children = node
.children
.values()
.flatten()
.map(|child_id| repr_helper(child_id.clone(), qube))
.collect::<Vec<String>>()
.join(", ");
format!("Node({}, {})", key, children)
}
Ok(repr_helper(self.id, qube))
}
fn __str__(&self, py: Python) -> String {
let qube = self.qube.bind(py).borrow();
let node = &qube[self.id];
let key = &qube.strings[node.key];
format!("Node({})", key)
}
#[getter]
pub fn get_children(&self, py: Python) -> Vec<NodeRef> {
let qube = self.qube.bind(py).borrow();
let node = &qube[self.id];
node.children
.values()
.flatten()
.map(|child_id| NodeRef {
id: *child_id,
qube: self.qube.clone_ref(py),
})
.collect()
}
}
#[pymethods]
impl Qube {
#[new]
pub fn py_new() -> Self {
Qube::new()
}
#[getter]
fn get_root(slf: Bound<'_, Self>) -> PyResult<NodeRef> {
Ok(NodeRef {
id: slf.borrow().root,
qube: slf.unbind(),
})
}
fn __repr__(&self) -> String {
self.string_tree()
}
fn __str__<'py>(&self) -> String {
self.string_tree()
}
fn _repr_html_(&self) -> String {
self.html_tree()
}
#[pyo3(name = "print")]
fn py_print(&self) -> String {
self.print(Option::None)
}
#[getter]
pub fn get_children(slf: Bound<'_, Self>, py: Python) -> PyResult<Vec<NodeRef>> {
let root = NodeRef {
id: slf.borrow().root,
qube: slf.unbind(),
};
Ok(root.get_children(py))
}
#[staticmethod]
pub fn from_json(data: &str) -> Result<Self, serialisation::JSONError> {
serialisation::from_json(data)
}
}

View File

@ -1,205 +0,0 @@
use std::collections::HashMap;
use std::hash::Hash;
use lasso::{Rodeo, Spur};
use pyo3::prelude::*;
use pyo3::types::PyList;
use std::num::NonZero;
use std::ops;
use std::sync::Arc;
// This data structure uses the Newtype Index Pattern
// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html
// See also https://github.com/nrc/r4cppp/blob/master/graphs/README.md#rcrefcellnode for a discussion of other approaches to trees and graphs in rust.
// https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/
// Index types use struct Id(NonZero<usize>)
// This reserves 0 as a special value which allows Option<Id(NonZero<usize>)> to be the same size as usize.
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
pub(crate) struct NodeId(NonZero<usize>);
// Allow node indices to index directly into Qubes:
impl ops::Index<NodeId> for Qube {
type Output = Node;
fn index(&self, index: NodeId) -> &Node {
&self.nodes[index.0.get() - 1]
}
}
impl ops::IndexMut<NodeId> for Qube {
fn index_mut(&mut self, index: NodeId) -> &mut Node {
&mut self.nodes[index.0.get() - 1]
}
}
impl NodeId {
pub fn new_infallible(value: NonZero<usize>) -> NodeId {
NodeId(value)
}
pub fn new(value: usize) -> Option<NodeId> {
NonZero::new(value).map(NodeId)
}
}
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
struct StringId(lasso::Spur);
impl ops::Index<StringId> for lasso::Rodeo {
type Output = str;
fn index(&self, index: StringId) -> &str {
&self[index.0]
}
}
#[derive(Debug)]
pub(crate) struct Node {
key: StringId,
metadata: HashMap<StringId, Vec<String>>,
parent: Option<NodeId>, // If not present, it's the root node
values: Vec<StringId>,
children: HashMap<StringId, Vec<NodeId>>,
}
#[pyclass]
pub struct NodeRef {
id: NodeId,
qube: Py<Qube>,
}
#[pymethods]
impl NodeRef {
fn __repr__(&self, py: Python) -> PyResult<String> {
let qube = self.qube.bind(py).borrow();
let node = &qube[self.id];
let key = &qube.strings[node.key];
let children = self
.get_children(py)
.iter()
.map(|child| child.__repr__(py))
.collect::<Result<Vec<_>, _>>()?
.join(", ");
Ok(format!("Node({}, {})", key, children))
}
fn __str__(&self, py: Python) -> String {
let qube = self.qube.bind(py).borrow();
let node = &qube[self.id];
let key = &qube.strings[node.key];
format!("Node({})", key)
}
#[getter]
pub fn get_children(&self, py: Python) -> Vec<NodeRef> {
let qube = self.qube.bind(py).borrow();
let node = &qube[self.id];
node.children
.values()
.flatten()
.map(|child_id| NodeRef {
id: *child_id,
qube: self.qube.clone_ref(py),
})
.collect()
}
}
impl Node {
fn new_root(q: &mut Qube) -> Node {
Node {
key: q.get_or_intern("root"),
metadata: HashMap::new(),
parent: None,
values: vec![],
children: HashMap::new(),
}
}
fn children(&self) -> impl Iterator<Item = &NodeId> {
self.children.values().flatten()
}
}
#[derive(Debug)]
#[pyclass]
pub struct Qube {
pub root: NodeId,
nodes: Vec<Node>,
strings: Rodeo,
}
impl Qube {
fn get_or_intern(&mut self, val: &str) -> StringId {
StringId(self.strings.get_or_intern(val))
}
pub fn add_node(&mut self, parent: NodeId, key: &str, values: &[&str]) -> NodeId {
let key_id = self.get_or_intern(key);
let values = values.iter().map(|val| self.get_or_intern(val)).collect();
// Create the node object
let node = Node {
key: key_id,
metadata: HashMap::new(),
values: values,
parent: Some(parent),
children: HashMap::new(),
};
// Insert it into the Qube arena and determine its id
self.nodes.push(node);
let node_id = NodeId::new(self.nodes.len()).unwrap();
// Add a reference to this node's id to the parents list of children.
let parent_node = &mut self[parent];
let key_group = parent_node.children.entry(key_id).or_insert(Vec::new());
key_group.push(node_id);
node_id
}
}
#[pymethods]
impl Qube {
#[new]
pub fn new() -> Self {
let mut q = Qube {
root: NodeId::new(1).unwrap(),
nodes: Vec::new(),
strings: Rodeo::default(),
};
let root = Node::new_root(&mut q);
q.nodes.push(root);
q
}
#[getter]
fn get_root<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult<NodeRef> {
Ok(NodeRef {
id: slf.root,
qube: slf.into(),
})
}
fn __repr__(&self) -> String {
format!("{:?}", &self)
}
fn __str__<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> String {
format!("Qube()")
}
#[getter]
pub fn get_children<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult<Vec<NodeRef>> {
let root = NodeRef {
id: slf.root,
// `into_py` clones the existing Python handle; no new Qube object is allocated.
qube: slf.into(),
};
Ok(root.get_children(py))
}
}

View File

@ -1,9 +1,27 @@
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use serde::{Deserialize, Serialize};
use serde_json::{Result, Value};
use serde_json::Value;
use std::collections::HashMap;
use crate::qube::{Node, NodeId, Qube};
use crate::{Node, NodeId, Qube};
// Use a newtype wrapper to allow us to implement auto conversion from serde_json::Error to PyErr
// via a wrapper intermediate
// see https://pyo3.rs/main/function/error-handling.html#foreign-rust-error-types
pub struct JSONError(serde_json::Error);
impl From<JSONError> for PyErr {
fn from(error: JSONError) -> Self {
PyValueError::new_err(format!("{}", error.0))
}
}
impl From<serde_json::Error> for JSONError {
fn from(other: serde_json::Error) -> Self {
Self(other)
}
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(untagged)]
@ -37,10 +55,7 @@ fn add_nodes(qube: &mut Qube, parent: NodeId, nodes: &[JSONQube]) -> Vec<NodeId>
.collect()
}
#[pyfunction]
pub fn parse_qube() -> PyResult<Qube> {
let data = r#"{"key": "root", "values": ["root"], "metadata": {}, "children": [{"key": "frequency", "values": "*", "metadata": {}, "children": [{"key": "levtype", "values": "*", "metadata": {}, "children": [{"key": "param", "values": "*", "metadata": {}, "children": [{"key": "levelist", "values": "*", "metadata": {}, "children": [{"key": "domain", "values": ["a", "b", "c", "d"], "metadata": {}, "children": []}]}]}]}]}]}"#;
pub fn from_json(data: &str) -> Result<Qube, JSONError> {
// Parse the string of data into serde_json::Value.
let json_qube: JSONQube = serde_json::from_str(data).expect("JSON parsing failed");

View File

@ -0,0 +1,2 @@
mod json;
pub use json::{from_json, JSONError};

View File

@ -1,13 +1,21 @@
from qubed.rust import Qube, parse_qube
from __future__ import annotations
q = Qube()
print(q)
import json
print(f"repr: {q.root!r} str: {q.root}")
from qubed import Qube as pyQube
from qubed.rust import Qube as Qube
q = parse_qube()
print(repr(q))
q = pyQube.from_tree("""
root, class=d1
dataset=another-value, generation=1/2/3
dataset=climate-dt/weather-dt, generation=1/2/3/4
""")
json_str = json.dumps(q.to_json())
rust_qube = Qube.from_json(json_str)
print(repr(rust_qube))
r = q.root
print(f"{q.root = }, {q.children = }")
expected = """root, class=d1
dataset=another-value, generation=1/2/3
dataset=climate-dt/weather-dt, generation=1/2/3/4"""
assert repr(rust_qube) == expected
# print(rs_qube._repr_html_())

View File

@ -0,0 +1,21 @@
from __future__ import annotations
import json
from qubed import Qube as pyQube
from qubed.rust import Qube as Qube
q = pyQube.from_tree("""
root, class=d1
dataset=another-value, generation=1/2/3
dataset=climate-dt/weather-dt, generation=1/2/3/4
""")
json_str = json.dumps(q.to_json())
rust_qube = Qube.from_json(json_str)
print(repr(rust_qube))
expected = """root, class=d1
dataset=another-value, generation=1/2/3
dataset=climate-dt/weather-dt, generation=1/2/3/4
"""
assert repr(rust_qube) == expected