qubed/src/rust/lib.rs
2025-05-29 17:09:17 +02:00

236 lines
6.3 KiB
Rust

#![allow(unused_imports)]
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use pyo3::types::{PyDict, PyInt, PyList, PyString};
use python_interface::QubeError;
use std::collections::HashMap;
use std::iter;
use pyo3::prelude::*;
use std::hash::Hash;
use std::rc::Rc;
use lasso::{Rodeo, Spur};
use std::num::NonZero;
use std::ops;
mod serialisation;
mod python_interface;
mod formatters;
mod set_operations;
// This data structure uses the Newtype Index Pattern
// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html
// See also https://github.com/nrc/r4cppp/blob/master/graphs/README.md#rcrefcellnode for a discussion of other approaches to trees and graphs in rust.
// https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/
// Index types use struct Id(NonZero<usize>)
// This reserves 0 as a special value which allows Option<Id(NonZero<usize>)> to be the same size as usize.
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
pub(crate) struct NodeId(NonZero<usize>);
// Allow node indices to index directly into Qubes:
impl ops::Index<NodeId> for Qube {
type Output = Node;
fn index(&self, index: NodeId) -> &Node {
&self.nodes[index.0.get() - 1]
}
}
impl ops::IndexMut<NodeId> for Qube {
fn index_mut(&mut self, index: NodeId) -> &mut Node {
&mut self.nodes[index.0.get() - 1]
}
}
impl ops::Index<StringId> for Qube {
type Output = str;
fn index(&self, index: StringId) -> &str {
&self.strings[index]
}
}
impl NodeId {
pub fn new(value: usize) -> Option<NodeId> {
NonZero::new(value).map(NodeId)
}
}
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
struct StringId(lasso::Spur);
impl ops::Index<StringId> for lasso::Rodeo {
type Output = str;
fn index(&self, index: StringId) -> &str {
&self[index.0]
}
}
#[derive(Debug, Clone)]
pub(crate) struct Node {
pub key: StringId,
pub metadata: HashMap<StringId, Vec<String>>,
pub parent: Option<NodeId>, // If not present, it's the root node
pub values: Vec<StringId>,
pub children: HashMap<StringId, Vec<NodeId>>,
}
impl Node {
fn new_root(q: &mut Qube) -> Node {
Node {
key: q.get_or_intern("root"),
metadata: HashMap::new(),
parent: None,
values: vec![],
children: HashMap::new(),
}
}
fn children(&self) -> impl Iterator<Item = &NodeId> {
self.children.values().flatten()
}
fn is_root(&self) -> bool {
self.parent.is_none()
}
/// Because children are stored grouped by key
/// determining the number of children quickly takes a little effort.
/// This is a fast method for the special case of checking if a Node has exactly one child.
/// Returns Ok(NodeId) if there is one child else None
fn has_exactly_one_child(&self) -> Option<NodeId> {
if self.children.len() != 1 {return None}
let Some(value_group) = self.children.values().next() else {return None};
let [node_id] = &value_group.as_slice() else {return None};
Some(*node_id)
}
fn n_children(&self) -> usize {
self.children
.values()
.map(|v| v.len())
.sum()
}
fn keys<'a>(&'a self, q: &'a Qube) -> impl Iterator<Item = &'a str> {
self.children.keys()
.map(|s| {&q[*s]})
}
}
#[derive(Debug, Clone)]
#[pyclass(subclass, dict)]
pub struct Qube {
pub root: NodeId,
nodes: Vec<Node>,
strings: Rodeo,
}
impl Qube {
pub fn new() -> Self {
let mut q = Self {
root: NodeId::new(1).unwrap(),
nodes: Vec::new(),
strings: Rodeo::default(),
};
let root = Node::new_root(&mut q);
q.nodes.push(root);
q
}
fn get_or_intern(&mut self, val: &str) -> StringId {
StringId(self.strings.get_or_intern(val))
}
pub(crate) fn add_node(&mut self, parent: NodeId, key: &str, values: &[&str]) -> NodeId {
let key_id = self.get_or_intern(key);
let values = values.iter().map(|val| self.get_or_intern(val)).collect();
// Create the node object
let node = Node {
key: key_id,
metadata: HashMap::new(),
values: values,
parent: Some(parent),
children: HashMap::new(),
};
// Insert it into the Qube arena and determine its id
self.nodes.push(node);
let node_id = NodeId::new(self.nodes.len()).unwrap();
// Add a reference to this node's id to the parents list of children.
let parent_node = &mut self[parent];
let key_group = parent_node.children.entry(key_id).or_insert(Vec::new());
key_group.push(node_id);
node_id
}
fn print(&self, node_id: Option<NodeId>) -> String {
let node_id: NodeId = node_id.unwrap_or(self.root);
let node = &self[node_id];
node.summary(&self)
}
fn get_node_ref(&self, id: NodeId) -> NodeRef {
let node = &self[id];
NodeRef { id: id, node: &node, qube: &self }
}
pub fn get_string_id(&self, s: &str) -> Option<StringId> {
self.strings.get(s)
.map(|id| StringId(id))
}
}
#[pymodule]
fn rust(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<Qube>()?;
m.add("QubeError", py.get_type::<python_interface::QubeError>())?;
Ok(())
}
pub struct NodeRef<'a> {
pub id: NodeId,
pub node: &'a Node,
pub qube: &'a Qube,
}
impl<'a> NodeRef<'a> {
pub fn keys(&self) -> impl Iterator<Item = &str> {
self.node.keys(self.qube)
}
fn flat_children(&'a self) -> impl Iterator<Item = Self> {
self.node.children
.values()
.flatten()
.map(|id| {
NodeRef { id: *id, node: &self.qube[*id], qube: self.qube }
})
}
fn children_by_key(&'a self, key: &str) -> impl Iterator<Item = Self> {
let id = self.qube.get_string_id(key);
let children = id
.map(|i| self.node.children.get(&i))
.flatten();
children.map(
|ids| ids.into_iter().map(
|id| {
NodeRef { id: *id, node: &self.qube[*id], qube: self.qube }
})).into_iter().flatten()
}
}