From 9d4fcbe62486c06a22ab14021b1580f8d72abe39 Mon Sep 17 00:00:00 2001 From: Tom Date: Tue, 18 Feb 2025 07:15:22 +0000 Subject: [PATCH] Set operations done --- src/python/qubed/Qube.py | 62 +++++++++------------- src/python/qubed/set_operations.py | 82 +++++++++++++++++++++++++----- src/python/qubed/value_types.py | 6 +++ tests/test_basic_operations.py | 46 +++++++++++++---- tests/test_compression.py | 29 +++++++++++ tests/test_smoke.py | 17 ++++++- 6 files changed, 181 insertions(+), 61 deletions(-) create mode 100644 tests/test_compression.py diff --git a/src/python/qubed/Qube.py b/src/python/qubed/Qube.py index be0d951..118d7bc 100644 --- a/src/python/qubed/Qube.py +++ b/src/python/qubed/Qube.py @@ -38,7 +38,9 @@ class Qube: return cls( data = NodeData(key, values, metadata = kwargs.get("metadata", frozendict()) ), - children = tuple(sorted(children)), + children = tuple(sorted(children, + key = lambda n : ((n.key, n.values.min())) + )), ) @@ -49,18 +51,19 @@ class Qube: key=json["key"], values=values_from_json(json["values"]), metadata=json["metadata"] if "metadata" in json else {}, - children=tuple(from_json(c) for c in json["children"]) + children=(from_json(c) for c in json["children"]), ) return from_json(json) @classmethod def from_dict(cls, d: dict) -> 'Qube': - def from_dict(d: dict) -> tuple[Qube, ...]: - return tuple(Qube.make( - key=k.split("=")[0], - values=QEnum((k.split("=")[1].split("/"))), - children=from_dict(children) - ) for k, children in d.items()) + def from_dict(d: dict) -> list[Qube]: + return [ + Qube.make( + key=k.split("=")[0], + values=QEnum((k.split("=")[1].split("/"))), + children=from_dict(children) + ) for k, children in d.items()] return Qube.make(key = "root", values=QEnum(("root",)), @@ -86,6 +89,15 @@ class Qube: def __or__(self, other: "Qube") -> "Qube": return set_operations.operation(self, other, set_operations.SetOperation.UNION) + + def __and__(self, other: "Qube") -> "Qube": + return set_operations.operation(self, other, set_operations.SetOperation.INTERSECTION) + + def __sub__(self, other: "Qube") -> "Qube": + return set_operations.operation(self, other, set_operations.SetOperation.DIFFERENCE) + + def __xor__(self, other: "Qube") -> "Qube": + return set_operations.operation(self, other, set_operations.SetOperation.SYMMETRIC_DIFFERENCE) def __getitem__(self, args) -> 'Qube': @@ -264,39 +276,13 @@ class Qube: return hash_node(self) def compress(self) -> "Qube": - # First compress the children + # First compress the children (this recursively compresses all the way to the leaves) new_children = [child.compress() for child in self.children] - # Now take the set of new children and see if any have identical key, metadata and children - # the values may different and will be collapsed into a single node - identical_children = defaultdict(set) - for child in new_children: - # only care about the key and children of each node, ignore values - key = hash((child.key, tuple((cc.structural_hash for cc in child.children)))) - identical_children[key].add(child) - - # Now go through and create new compressed nodes for any groups that need collapsing - new_children = [] - for child_set in identical_children.values(): - if len(child_set) > 1: - child_set = list(child_set) - key = child_set[0].key - - # Compress the children into a single node - assert all(isinstance(child.data.values, QEnum) for child in child_set), "All children must have QEnum values" - - node_data = NodeData( - key = key, - metadata = frozendict(), # Todo: Implement metadata compression - values = QEnum((v for child in child_set for v in child.data.values.values)), - ) - new_child = Qube(data = node_data, children = child_set[0].children) - else: - # If the group is size one just keep it - new_child = child_set.pop() - - new_children.append(new_child) + # Now compress the set of children at this level + new_children = set_operations.compress_children(new_children) + # Return the now compressed node return Qube( data = self.data, children = tuple(sorted(new_children)) diff --git a/src/python/qubed/set_operations.py b/src/python/qubed/set_operations.py index 1699ae7..8e4b1b7 100644 --- a/src/python/qubed/set_operations.py +++ b/src/python/qubed/set_operations.py @@ -1,10 +1,12 @@ -import dataclasses from collections import defaultdict +from dataclasses import replace from enum import Enum # Prevent circular imports while allowing the type checker to know what Qube is from typing import TYPE_CHECKING, Iterable +from frozendict import frozendict + from .node_types import NodeData from .value_types import QEnum, Values @@ -48,28 +50,82 @@ def operation(A: "Qube", B : "Qube", operation_type: SetOperation) -> "Qube": for key, (A_nodes, B_nodes) in nodes_by_key.items(): new_children.extend(_operation(key, A_nodes, B_nodes, operation_type)) + # Whenever we modify children we should recompress them + # But since `operation` is already recursive, we only need to compress this level not all levels + # Hence we use the non-recursive _compress method + new_children = compress_children(new_children) + # The values and key are the same so we just replace the children - return dataclasses.replace(A, children=new_children) + return replace(A, children=new_children) # The root node is special so we need a helper method that we can recurse on def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetOperation) -> Iterable["Qube"]: + # Iterate over all pairs (node_A, node_B) for node_a in A: for node_b in B: + + # Compute A - B, A & B, B - A just_A, intersection, just_B = fused_set_operations( node_a.values, node_b.values ) - for values in just_A: - data = NodeData(key, values, {}) - yield type(node_a)(data, node_a.children) + keep_just_A, keep_intersection, keep_just_B = operation_type.value - if intersection: - intersected_children = operation(node_a, node_b, operation_type) - for values in intersection: - data = NodeData(key, values, {}) - yield type(node_a)(data, intersected_children) + # Values in just_A and just_B are simple because + # we can just make new nodes that copy the children of node_A or node_B + if keep_just_A: + for group in just_A: + data = NodeData(key, group, {}) + yield type(node_a)(data, node_a.children) - for values in just_B: - data = NodeData(key, values, {}) - yield type(node_a)(data, node_b.children) \ No newline at end of file + if keep_just_B: + for group in just_B: + data = NodeData(key, group, {}) + yield type(node_a)(data, node_b.children) + + if keep_intersection: + for group in intersection: + if group: + new_node_a = replace(node_a, data = replace(node_a.data, values = group)) + new_node_b = replace(node_b, data= replace(node_b.data, values = group)) + yield operation(new_node_a, new_node_b, operation_type) + +def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]: + """ + Helper method tht only compresses a set of nodes, and doesn't do it recursively. + Used in Qubed.compress but also to maintain compression in the set operations above. + """ + # Now take the set of new children and see if any have identical key, metadata and children + # the values may different and will be collapsed into a single node + identical_children = defaultdict(set) + for child in children: + # only care about the key and children of each node, ignore values + key = hash((child.key, tuple((cc.structural_hash for cc in child.children)))) + identical_children[key].add(child) + + # Now go through and create new compressed nodes for any groups that need collapsing + new_children = [] + for child_set in identical_children.values(): + if len(child_set) > 1: + child_set = list(child_set) + node_type = type(child_set[0]) + key = child_set[0].key + + # Compress the children into a single node + assert all(isinstance(child.data.values, QEnum) for child in child_set), "All children must have QEnum values" + + node_data = NodeData( + key = key, + metadata = frozendict(), # Todo: Implement metadata compression + values = QEnum((v for child in child_set for v in child.data.values.values)), + ) + new_child = node_type(data = node_data, children = child_set[0].children) + else: + # If the group is size one just keep it + new_child = child_set.pop() + + new_children.append(new_child) + return tuple(sorted(new_children, + key = lambda n : ((n.key, tuple(sorted(n.values.values)))) + )) \ No newline at end of file diff --git a/src/python/qubed/value_types.py b/src/python/qubed/value_types.py index 6557514..420a5e5 100644 --- a/src/python/qubed/value_types.py +++ b/src/python/qubed/value_types.py @@ -21,6 +21,10 @@ class Values(ABC): @abstractmethod def from_strings(self, values: Iterable[str]) -> list['Values']: pass + + @abstractmethod + def min(self): + pass T = TypeVar("T") EnumValuesType = FrozenSet[T] @@ -50,6 +54,8 @@ class QEnum(Values): return value in self.values def from_strings(self, values: Iterable[str]) -> list['Values']: return [type(self)(tuple(values))] + def min(self): + return min(self.values) @dataclass(frozen=True) class Range(Values, ABC): diff --git a/tests/test_basic_operations.py b/tests/test_basic_operations.py index 82c5ac5..8f87e8f 100644 --- a/tests/test_basic_operations.py +++ b/tests/test_basic_operations.py @@ -27,14 +27,42 @@ def test_n_leaves(): assert q.n_leaves == 27 + 1 -# def test_union(): -# q = Qube.from_dict({"a=1/2/3" : {"b=1" : {}},}) -# r = Qube.from_dict({"a=2/3/4" : {"b=2" : {}},}) +def test_union(): + q = Qube.from_dict({"a=1/2/3" : {"b=1" : {}},}) + r = Qube.from_dict({"a=2/3/4" : {"b=2" : {}},}) -# u = Qube.from_dict({ -# "a=1" : {"b=1" : {}}, -# "a=1/2/3" : {"b=1/2" : {}}, -# "a=4" : {"b=2" : {}}, -# }) + u = Qube.from_dict({ + "a=4" : {"b=2" : {}}, + "a=1" : {"b=1" : {}}, + "a=2/3" : {"b=1/2" : {}}, -# assert q | r == u \ No newline at end of file + }) + + assert q | r == u + +def test_difference(): + q = Qube.from_dict({"a=1/2/3/5" : {"b=1" : {}},}) + r = Qube.from_dict({"a=2/3/4" : {"b=1" : {}},}) + + i = Qube.from_dict({ + "a=1/5" : {"b=1" : {}}, + + }) + + assert q - r == i + +def test_order_independence(): + u = Qube.from_dict({ + "a=4" : {"b=2" : {}}, + "a=1" : {"b=2" : {}, "b=1" : {}}, + "a=2/3" : {"b=1/2" : {}}, + + }) + + v = Qube.from_dict({ + "a=2/3" : {"b=1/2" : {}}, + "a=4" : {"b=2" : {}}, + "a=1" : {"b=1" : {}, "b=2" : {}}, + }) + + assert u == v \ No newline at end of file diff --git a/tests/test_compression.py b/tests/test_compression.py new file mode 100644 index 0000000..79bea70 --- /dev/null +++ b/tests/test_compression.py @@ -0,0 +1,29 @@ +from qubed import Qube + + +def test_smoke(): + q = Qube.from_dict({ + "class=od" : { + "expver=0001": {"param=1":{}, "param=2":{}}, + "expver=0002": {"param=1":{}, "param=2":{}}, + }, + "class=rd" : { + "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}}, + "expver=0002": {"param=1":{}, "param=2":{}}, + }, + }) + + # root + # ├── class=od, expver=0001/0002, param=1/2 + # └── class=rd + # ├── expver=0001, param=1/2/3 + # └── expver=0002, param=1/2 + ct = Qube.from_dict({ + "class=od" : {"expver=0001/0002": {"param=1/2":{}}}, + "class=rd" : { + "expver=0001": {"param=1/2/3":{}}, + "expver=0002": {"param=1/2":{}}, + }, + }) + + assert q.compress() == ct \ No newline at end of file diff --git a/tests/test_smoke.py b/tests/test_smoke.py index f384ac8..79bea70 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -11,4 +11,19 @@ def test_smoke(): "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}}, "expver=0002": {"param=1":{}, "param=2":{}}, }, - }) \ No newline at end of file + }) + + # root + # ├── class=od, expver=0001/0002, param=1/2 + # └── class=rd + # ├── expver=0001, param=1/2/3 + # └── expver=0002, param=1/2 + ct = Qube.from_dict({ + "class=od" : {"expver=0001/0002": {"param=1/2":{}}}, + "class=rd" : { + "expver=0001": {"param=1/2/3":{}}, + "expver=0002": {"param=1/2":{}}, + }, + }) + + assert q.compress() == ct \ No newline at end of file