A bit more on the rust backend

This commit is contained in:
Tom 2025-05-29 17:09:17 +02:00
parent 3328a0375b
commit aaafa28dfb
9 changed files with 363 additions and 73 deletions

View File

@ -61,7 +61,6 @@ class QubeNamedRoot:
"Helper class to print a custom root name"
key: str
dtype: str = "str"
children: tuple[Qube, ...] = ()
def summary(self) -> str:
@ -388,6 +387,7 @@ class Qube:
for c in node.children:
yield from to_list_of_cubes(c)
else:
if not node.children:
yield {node.key: list(node.values)}

View File

@ -65,6 +65,20 @@ class ValueGroup(ABC):
T = TypeVar("T")
EnumValuesType = FrozenSet[T]
_dtype_map: dict[str, type] = {
"str": str,
"int64": int,
"float64": float,
"date": datetime,
}
_dtype_map_inv: dict[type, str] = {v: k for k, v in _dtype_map.items()}
_dtype_formatters = {
"str": str,
"int64": int,
"float64": float,
"date": datetime.fromisoformat,
}
@dataclass(frozen=True, order=True)
class QEnum(ValueGroup):
@ -76,10 +90,12 @@ class QEnum(ValueGroup):
values: EnumValuesType
_dtype: str = "str"
def __init__(self, obj):
def __init__(self, obj, dtype="str"):
object.__setattr__(self, "values", tuple(sorted(obj)))
object.__setattr__(
self, "dtype", type(self.values[0]) if len(self.values) > 0 else "str"
self,
"_dtype",
dtype,
)
def __post_init__(self):
@ -108,7 +124,18 @@ class QEnum(ValueGroup):
return min(self.values)
def to_json(self):
return list(self.values)
return {"type": "enum", "dtype": self.dtype(), "values": self.values}
# @classmethod
# def from_json(cls, type: Literal["enum"], dtype: str, values: list):
# dtype_formatter = _dtype_formatters[dtype]
@classmethod
def from_list(cls, obj):
example = obj[0]
dtype = type(example)
assert [type(v) is dtype for v in obj]
return cls(obj, dtype=_dtype_map_inv[dtype])
@dataclass(frozen=True, order=True)
@ -389,17 +416,13 @@ class IntRange(Range):
return ranges
def values_from_json(obj) -> ValueGroup:
def values_from_json(obj: dict | list) -> ValueGroup:
if isinstance(obj, list):
return QEnum(tuple(obj))
return QEnum.from_list(obj)
match obj["dtype"]:
case "date":
return DateRange(**obj)
case "time":
return TimeRange(**obj)
case "int":
return IntRange(**obj)
match obj["type"]:
case "enum":
QEnum.from_json(**obj)
case _:
raise ValueError(f"Unknown dtype {obj['dtype']}")

View File

@ -3,10 +3,12 @@
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use pyo3::types::{PyDict, PyInt, PyList, PyString};
use python_interface::QubeError;
use std::collections::HashMap;
use std::iter;
use pyo3::prelude::*;
use std::hash::Hash;
use std::rc::Rc;
use lasso::{Rodeo, Spur};
use std::num::NonZero;
@ -15,6 +17,7 @@ use std::ops;
mod serialisation;
mod python_interface;
mod formatters;
mod set_operations;
// This data structure uses the Newtype Index Pattern
// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html
@ -51,9 +54,6 @@ impl ops::Index<StringId> for Qube {
}
impl NodeId {
pub fn new_infallible(value: NonZero<usize>) -> NodeId {
NodeId(value)
}
pub fn new(value: usize) -> Option<NodeId> {
NonZero::new(value).map(NodeId)
}
@ -70,7 +70,7 @@ impl ops::Index<StringId> for lasso::Rodeo {
}
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub(crate) struct Node {
pub key: StringId,
pub metadata: HashMap<StringId, Vec<String>>,
@ -115,9 +115,14 @@ impl Node {
.map(|v| v.len())
.sum()
}
fn keys<'a>(&'a self, q: &'a Qube) -> impl Iterator<Item = &'a str> {
self.children.keys()
.map(|s| {&q[*s]})
}
}
#[derive(Debug)]
#[derive(Debug, Clone)]
#[pyclass(subclass, dict)]
pub struct Qube {
pub root: NodeId,
@ -142,7 +147,7 @@ impl Qube {
StringId(self.strings.get_or_intern(val))
}
pub fn add_node(&mut self, parent: NodeId, key: &str, values: &[&str]) -> NodeId {
pub(crate) fn add_node(&mut self, parent: NodeId, key: &str, values: &[&str]) -> NodeId {
let key_id = self.get_or_intern(key);
let values = values.iter().map(|val| self.get_or_intern(val)).collect();
@ -172,11 +177,59 @@ impl Qube {
let node = &self[node_id];
node.summary(&self)
}
fn get_node_ref(&self, id: NodeId) -> NodeRef {
let node = &self[id];
NodeRef { id: id, node: &node, qube: &self }
}
pub fn get_string_id(&self, s: &str) -> Option<StringId> {
self.strings.get(s)
.map(|id| StringId(id))
}
}
#[pymodule]
fn rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
fn rust(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<Qube>()?;
m.add("QubeError", py.get_type::<python_interface::QubeError>())?;
Ok(())
}
pub struct NodeRef<'a> {
pub id: NodeId,
pub node: &'a Node,
pub qube: &'a Qube,
}
impl<'a> NodeRef<'a> {
pub fn keys(&self) -> impl Iterator<Item = &str> {
self.node.keys(self.qube)
}
fn flat_children(&'a self) -> impl Iterator<Item = Self> {
self.node.children
.values()
.flatten()
.map(|id| {
NodeRef { id: *id, node: &self.qube[*id], qube: self.qube }
})
}
fn children_by_key(&'a self, key: &str) -> impl Iterator<Item = Self> {
let id = self.qube.get_string_id(key);
let children = id
.map(|i| self.node.children.get(&i))
.flatten();
children.map(
|ids| ids.into_iter().map(
|id| {
NodeRef { id: *id, node: &self.qube[*id], qube: self.qube }
})).into_iter().flatten()
}
}

View File

@ -1,19 +1,34 @@
use crate::{Node, NodeId, Qube};
use crate::{Node, NodeId, Qube, NodeRef};
use pyo3::prelude::*;
use pyo3::types::PyList;
use pyo3::types::{PyList, PyType};
use core::borrow;
use std::ops::Deref;
use std::cell::Ref;
use crate::set_operations;
use crate::serialisation;
use itertools::Itertools;
use pyo3::create_exception;
create_exception!(qubed, QubeError, pyo3::exceptions::PyException);
/// A reference to a particular node in a Qube
#[pyclass]
pub struct NodeRef {
pub struct PyNodeRef {
id: NodeId,
qube: Py<Qube>, // see https://pyo3.rs/v0.23.1/types for a discussion of Py<T> and Bound<'py, T>
}
fn into_py_node_ref(node_ref: NodeRef, qube: Py<Qube>) -> PyNodeRef {
PyNodeRef {
id: node_ref.id,
qube: qube,
}
}
#[pymethods]
impl NodeRef {
impl PyNodeRef {
fn __repr__(&self, py: Python) -> PyResult<String> {
// Get the Py<Qube> reference, bind it to the GIL.
let qube = self.qube.bind(py);
@ -43,13 +58,13 @@ impl NodeRef {
}
#[getter]
pub fn get_children(&self, py: Python) -> Vec<NodeRef> {
pub fn get_children(&self, py: Python) -> Vec<Self> {
let qube = self.qube.bind(py).borrow();
let node = &qube[self.id];
node.children
.values()
.flatten()
.map(|child_id| NodeRef {
.map(|child_id| Self {
id: *child_id,
qube: self.qube.clone_ref(py),
})
@ -57,6 +72,21 @@ impl NodeRef {
}
}
#[derive(FromPyObject)]
pub enum OneOrMany<T> {
One(T),
Many(Vec<T>),
}
impl<T> Into<Vec<T>> for OneOrMany<T> {
fn into(self) -> Vec<T> {
match self {
OneOrMany::One(v) => vec![v],
OneOrMany::Many(vs) => vs,
}
}
}
#[pymethods]
impl Qube {
#[new]
@ -64,16 +94,56 @@ impl Qube {
Qube::new()
}
#[pyo3(name = "add_node")]
pub fn py_add_node(
slf: Bound<'_, Self>,
parent: PyRef<'_, PyNodeRef>,
key: &str,
values: OneOrMany<String>,
) -> PyResult<PyNodeRef> {
// Check that the given parent is actually in this qube and not another one
if !parent.qube.bind(slf.py()).is(&slf) {
return Err(QubeError::new_err("Supplied parent node is not in the target qube."))
}
// massage values from T | Vec<T> into Vec<T>
let values: Vec<String> = values.into();
let values_refs: Vec<&str> = values.iter().map(String::as_str).collect();
let mut q = slf.borrow_mut();
let node_id = q.add_node(parent.id, key, &values_refs);
Ok(PyNodeRef { id: node_id, qube: slf.into()})
}
pub fn set_root(
slf: Bound<'_, Self>,
node: PyRef<'_, PyNodeRef>,
) -> () {
let mut q = slf.borrow_mut();
q.root = node.id;
}
#[getter]
fn get_root(slf: Bound<'_, Self>) -> PyResult<NodeRef> {
Ok(NodeRef {
fn get_root(slf: Bound<'_, Self>) -> PyResult<PyNodeRef> {
Ok(PyNodeRef {
id: slf.borrow().root,
qube: slf.unbind(),
})
}
fn __repr__(&self) -> String {
self.string_tree()
// format!("{:?}", self)
let nodes_str: String = self.nodes.iter()
.enumerate()
.map(|(id, node)| {
format!("{{id: {}, key: {}, values: [{}], children: [{}]}}",
id+1,
&self[node.key],
node.values.iter().map(|s| &self[*s]).join(", "),
node.children().map(|n| n.0).join(", "),
)
}).join(", ");
format!("Qube {{root: {}, nodes: {}}}", self.root.0, nodes_str)
}
fn __str__<'py>(&self) -> String {
@ -90,8 +160,8 @@ impl Qube {
}
#[getter]
pub fn get_children(slf: Bound<'_, Self>, py: Python) -> PyResult<Vec<NodeRef>> {
let root = NodeRef {
pub fn get_children(slf: Bound<'_, Self>, py: Python) -> PyResult<Vec<PyNodeRef>> {
let root = PyNodeRef {
id: slf.borrow().root,
qube: slf.unbind(),
};
@ -102,4 +172,8 @@ impl Qube {
pub fn from_json(data: &str) -> Result<Self, serialisation::JSONError> {
serialisation::from_json(data)
}
pub fn __or__(slf: Bound<'_, Self>, other: Bound<'_, Qube>) -> Qube {
set_operations::set_operation(&slf.borrow(), &other.borrow(), set_operations::Op::Union)
}
}

View File

@ -24,10 +24,23 @@ impl From<serde_json::Error> for JSONError {
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(untagged)]
#[serde(tag = "dtype")]
enum Ranges {
Int64{values: Vec<(i64, i64)>}
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "dtype", rename_all = "lowercase")]
enum Enum {
Str{values: Vec<String>}
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "type", rename_all = "lowercase")]
enum Values {
Wildcard(String),
Enum(Vec<String>),
Wildcard{},
Enum(Enum),
Range(Ranges)
}
#[derive(Serialize, Deserialize, Debug)]
@ -43,8 +56,9 @@ fn add_nodes(qube: &mut Qube, parent: NodeId, nodes: &[JSONQube]) -> Vec<NodeId>
.iter()
.map(|json_node| {
let values = match &json_node.values {
Values::Wildcard(_) => &vec!["*"],
Values::Enum(strings) => &strings.iter().map(|s| s.as_str()).collect(),
Values::Wildcard{} => &vec!["*"],
Values::Enum(Enum::Str{values}) => &values.iter().map(|s| s.as_str()).collect(),
Values::Range(_) => todo!(),
};
let node_id = qube.add_node(parent, &json_node.key, values);

View File

@ -0,0 +1,40 @@
use crate::NodeRef;
use crate::{Node, NodeId, Qube};
use itertools::chain;
use std::collections::HashSet;
pub enum Op {
Union,
Intersection,
Difference,
SymmetricDifference,
}
fn op_to_venn_diagram(op: Op) -> (bool, bool, bool) {
use Op::*;
match op {
Union => (true, true, true),
Intersection => (false, true, false),
Difference => (true, false, false),
SymmetricDifference => (true, false, true),
}
}
pub fn set_operation<'a>(a: &'a Qube, b: &'a Qube, op: Op) -> Qube {
todo!()
// _set_operation(a.root_ref(), a.root_ref(), op)
}
// fn _set_operation<'a>(a: NodeRef, b: NodeRef, op: Op) -> Qube {
// let keys: HashSet<&str> = HashSet::from_iter(chain(a.keys(), b.keys()));
// for key in keys {
// let a = a.children_by_key(key)
// }
// todo!()
// }
pub fn set_operation_inplace<'a>(a: &'a mut Qube, b: &'a Qube, op: Op) -> &'a Qube {
a
}

View File

@ -1,21 +1,81 @@
from __future__ import annotations
import json
from datetime import datetime
from typing import Sequence
from qubed import Qube as pyQube
from qubed.rust import Qube as Qube
from qubed.rust import Qube as rsQube
q = pyQube.from_tree("""
root, class=d1
dataset=another-value, generation=1/2/3
dataset=climate-dt/weather-dt, generation=1/2/3/4
""")
json_str = json.dumps(q.to_json())
rust_qube = Qube.from_json(json_str)
print(repr(rust_qube))
# q = pyQube.from_tree("""
# root, class=d1
# ├── dataset=another-value, generation=1/2/3
# └── dataset=climate-dt/weather-dt, generation=1/2/3/4
# """)
# json_str = json.dumps(q.to_json())
# rust_qube = Qube.from_json(json_str)
# # print(repr(rust_qube))
expected = """root, class=d1
dataset=another-value, generation=1/2/3
dataset=climate-dt/weather-dt, generation=1/2/3/4"""
assert repr(rust_qube) == expected
# print(rs_qube._repr_html_())
# # print(json_str)
# expected = """root, class=d1
# ├── dataset=another-value, generation=1/2/3
# └── dataset=climate-dt/weather-dt, generation=1/2/3/4
# """
# assert repr(rust_qube) == expected
# # print(rs_qube._repr_html_())
# print(q | q)
value = str | int | float | datetime
class Qube(rsQube):
@classmethod
def empty(cls):
q = cls()
print(f"empty called {cls = } {q = }")
return q
@classmethod
def from_datacube(cls, datacube: dict[str, value | Sequence[value]]) -> Qube:
qube = cls.empty()
(key, values), *key_vals = list(datacube.items())
node = qube.add_node(qube.root, key, values)
for key, values in key_vals:
node = qube.add_node(parent=node, key=key, values=values)
return qube
@classmethod
def from_dict(cls, d: dict) -> Qube:
q = cls.empty()
def from_dict(parent, d: dict):
for k, children in d.items():
key, values = k.split("=")
values = values.split("/")
node = q.add_node(
parent=parent,
key=key,
values=values,
)
from_dict(parent=node, d=children)
from_dict(q.root, d)
return q
q = Qube.from_datacube({"a": ["4"], "b": "test", "c": ["1", "2", "3"]})
print(q)
print(repr(q))
q = Qube.from_dict(
{
"a=2/3": {"b=1": {}},
"a2=a/b": {"b2=1/2": {}},
}
)
print(q)
print(repr(q))

View File

@ -16,3 +16,29 @@ def test_iter_leaves_simple():
]
assert set(make_hashable(q.leaves())) == set(make_hashable(entries))
def test_datacubes():
q = Qube.from_tree("""
root, class=d1
date=19920101/19930101/19940101, params=1/2/3
date=19950101
level=1/2/3, params=1/2/3/4
params=1/2/3/4
""")
assert len(list(q.datacubes())) == 3
assert list(q.datacubes()) == [
{
"class": ["d1"],
"date": ["19920101", "19930101", "19940101"],
"params": ["1", "2", "3"],
},
{
"class": ["d1"],
"date": ["19950101"],
"level": ["1", "2", "3"],
"params": ["1", "2", "3", "4"],
},
{"class": ["d1"], "date": ["19950101"], "params": ["1", "2", "3", "4"]},
]

View File

@ -1,21 +1,21 @@
from __future__ import annotations
import json
from qubed import Qube as pyQube
from qubed.rust import Qube as Qube
q = pyQube.from_tree("""
root, class=d1
dataset=another-value, generation=1/2/3
dataset=climate-dt/weather-dt, generation=1/2/3/4
""")
json_str = json.dumps(q.to_json())
rust_qube = Qube.from_json(json_str)
print(repr(rust_qube))
expected = """root, class=d1
dataset=another-value, generation=1/2/3
dataset=climate-dt/weather-dt, generation=1/2/3/4
"""
assert repr(rust_qube) == expected
# def test_from_json():
# q = pyQube.from_tree("""
# root, class=d1
# ├── dataset=another-value, generation=1/2/3
# └── dataset=climate-dt/weather-dt, generation=1/2/3/4
# """)
# json_str = json.dumps(q.to_json())
# rust_qube = Qube.from_json(json_str)
# print(repr(rust_qube))
# expected = """root, class=d1
# ├── dataset=another-value, generation=1/2/3
# └── dataset=climate-dt/weather-dt, generation=1/2/3/4
# """
# assert repr(rust_qube) == expected