add remove_by_key and improve compression
This commit is contained in:
parent
2e36db4268
commit
70b1fd65e5
@ -1,4 +1,5 @@
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
|
import functools
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@ -122,6 +123,13 @@ class Qube:
|
|||||||
|
|
||||||
return Qube.root_node(list(from_dict(d)))
|
return Qube.root_node(list(from_dict(d)))
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
def to_dict(q: "Qube") -> tuple[str, dict]:
|
||||||
|
key = f"{q.key}={','.join(str(v) for v in q.values.values)}"
|
||||||
|
return key, dict(to_dict(c) for c in q.children)
|
||||||
|
|
||||||
|
return to_dict(self)[1]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_tree(cls, tree_str):
|
def from_tree(cls, tree_str):
|
||||||
lines = tree_str.splitlines()
|
lines = tree_str.splitlines()
|
||||||
@ -283,17 +291,20 @@ class Qube:
|
|||||||
else:
|
else:
|
||||||
yield leaf, metadata
|
yield leaf, metadata
|
||||||
|
|
||||||
def datacubes(self) -> "Qube":
|
def datacubes(self) -> Iterable[dict[str, Any | list[Any]]]:
|
||||||
def to_list_of_cubes(node: Qube) -> Iterable[Qube]:
|
def to_list_of_cubes(node: Qube) -> Iterable[dict[str, Any | list[Any]]]:
|
||||||
if not node.children:
|
if node.key == "root":
|
||||||
yield node
|
|
||||||
# print(node.key)
|
|
||||||
for c in node.children:
|
for c in node.children:
|
||||||
# print(c)
|
yield from to_list_of_cubes(c)
|
||||||
for sub_cube in to_list_of_cubes(c):
|
|
||||||
yield node.replace(children=[sub_cube])
|
|
||||||
|
|
||||||
return Qube.root_node((q for c in self.children for q in to_list_of_cubes(c)))
|
if not node.children:
|
||||||
|
yield {node.key: list(node.values.values)}
|
||||||
|
|
||||||
|
for c in node.children:
|
||||||
|
for sub_cube in to_list_of_cubes(c):
|
||||||
|
yield {node.key: list(node.values.values)} | sub_cube
|
||||||
|
|
||||||
|
return to_list_of_cubes(self)
|
||||||
|
|
||||||
def __getitem__(self, args) -> "Qube":
|
def __getitem__(self, args) -> "Qube":
|
||||||
if isinstance(args, str):
|
if isinstance(args, str):
|
||||||
@ -354,6 +365,22 @@ class Qube:
|
|||||||
children = tuple(cc for c in self.children for cc in transform(c))
|
children = tuple(cc for c in self.children for cc in transform(c))
|
||||||
return self.replace(children=children)
|
return self.replace(children=children)
|
||||||
|
|
||||||
|
def remove_by_key(self, keys: str | list[str]):
|
||||||
|
_keys: list[str] = keys if isinstance(keys, list) else [keys]
|
||||||
|
|
||||||
|
def remove_key(node: "Qube") -> "Qube":
|
||||||
|
children = []
|
||||||
|
for c in node.children:
|
||||||
|
if c.key in _keys:
|
||||||
|
grandchildren = tuple(sorted(remove_key(cc) for cc in c.children))
|
||||||
|
children.extend(grandchildren)
|
||||||
|
else:
|
||||||
|
children.append(remove_key(c))
|
||||||
|
|
||||||
|
return node.replace(children=tuple(sorted(children)))
|
||||||
|
|
||||||
|
return remove_key(self).compress()
|
||||||
|
|
||||||
def convert_dtypes(self, converters: dict[str, Callable[[Any], Any]]):
|
def convert_dtypes(self, converters: dict[str, Callable[[Any], Any]]):
|
||||||
def convert(node: Qube) -> Qube:
|
def convert(node: Qube) -> Qube:
|
||||||
if node.key in converters:
|
if node.key in converters:
|
||||||
@ -474,11 +501,25 @@ class Qube:
|
|||||||
return hash_node(self)
|
return hash_node(self)
|
||||||
|
|
||||||
def compress(self) -> "Qube":
|
def compress(self) -> "Qube":
|
||||||
# First compress the children (this recursively compresses all the way to the leaves)
|
"""
|
||||||
new_children = [child.compress() for child in self.children]
|
This method is quite computationally heavy because of trees like this:
|
||||||
|
root, class=d1, generation=1
|
||||||
|
├── time=0600, many identical keys, param=8,78,79
|
||||||
|
├── time=0600, many identical keys, param=8,78,79
|
||||||
|
└── time=0600, many identical keys, param=8,78,79
|
||||||
|
This tree compresses dow n
|
||||||
|
|
||||||
# Now compress the set of children at this level
|
"""
|
||||||
new_children = set_operations.compress_children(new_children)
|
|
||||||
|
|
||||||
# Return the now compressed node
|
def union(a: "Qube", b: "Qube") -> "Qube":
|
||||||
return Qube.make(self.key, self.values, new_children)
|
b = type(self).root_node(children=(b,))
|
||||||
|
out = set_operations.operation(
|
||||||
|
a, b, set_operations.SetOperation.UNION, type(self)
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
new_children = [c.compress() for c in self.children]
|
||||||
|
if len(new_children) > 1:
|
||||||
|
new_children = functools.reduce(union, new_children, Qube.empty()).children
|
||||||
|
|
||||||
|
return self.replace(children=tuple(sorted(new_children)))
|
||||||
|
@ -168,3 +168,12 @@ def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]:
|
|||||||
|
|
||||||
new_children.append(new_child)
|
new_children.append(new_child)
|
||||||
return tuple(sorted(new_children, key=lambda n: ((n.key, n.values.min()))))
|
return tuple(sorted(new_children, key=lambda n: ((n.key, n.values.min()))))
|
||||||
|
|
||||||
|
|
||||||
|
def union(a: "Qube", b: "Qube") -> "Qube":
|
||||||
|
return operation(
|
||||||
|
a,
|
||||||
|
b,
|
||||||
|
SetOperation.UNION,
|
||||||
|
type(a),
|
||||||
|
)
|
||||||
|
@ -15,19 +15,62 @@ def test_smoke():
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# root
|
ct = Qube.from_tree("""
|
||||||
# ├── class=od, expver=0001/0002, param=1/2
|
root
|
||||||
# └── class=rd
|
├── class=od, expver=0001/0002, param=1/2
|
||||||
# ├── expver=0001, param=1/2/3
|
└── class=rd
|
||||||
# └── expver=0002, param=1/2
|
├── expver=0001, param=1/2/3
|
||||||
ct = Qube.from_dict(
|
└── expver=0002, param=1/2
|
||||||
|
""")
|
||||||
|
|
||||||
|
assert q.compress() == ct
|
||||||
|
|
||||||
|
|
||||||
|
def test_2():
|
||||||
|
qube = Qube.from_dict(
|
||||||
{
|
{
|
||||||
"class=od": {"expver=0001/0002": {"param=1/2": {}}},
|
"class=d1": {
|
||||||
"class=rd": {
|
"generation=1": {
|
||||||
"expver=0001": {"param=1/2/3": {}},
|
"date=20240728": {"time=0600": {"param=8/78/79": {}}},
|
||||||
"expver=0002": {"param=1/2": {}},
|
"date=20240828": {"time=0600": {"param=8/78/79": {}}},
|
||||||
},
|
"date=20240928": {"time=0600": {"param=8/78/79": {}}},
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
assert q.compress() == ct
|
target = Qube.from_datacube(
|
||||||
|
{
|
||||||
|
"class": "d1",
|
||||||
|
"generation": "1",
|
||||||
|
"date": ["20240728", "20240828", "20240928"],
|
||||||
|
"time": "0600",
|
||||||
|
"param": ["8", "78", "79"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert qube.compress() == target
|
||||||
|
|
||||||
|
|
||||||
|
def test_removal_compression():
|
||||||
|
qube = Qube.from_dict(
|
||||||
|
{
|
||||||
|
"class=d1": {
|
||||||
|
"generation=1": {
|
||||||
|
"month=07": {"date=20240728": {"time=0600": {"param=8/78/79": {}}}},
|
||||||
|
"month=08": {"date=20240828": {"time=0600": {"param=8/78/79": {}}}},
|
||||||
|
"month=09": {"date=20240928": {"time=0600": {"param=8/78/79": {}}}},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
target = Qube.from_datacube(
|
||||||
|
{
|
||||||
|
"class": "d1",
|
||||||
|
"generation": "1",
|
||||||
|
"date": ["20240728", "20240828", "20240928"],
|
||||||
|
"time": "0600",
|
||||||
|
"param": ["8", "78", "79"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert qube.remove_by_key(["month"]) == target
|
||||||
|
Loading…
x
Reference in New Issue
Block a user