add remove_by_key and improve compression
This commit is contained in:
parent
2e36db4268
commit
70b1fd65e5
@ -1,4 +1,5 @@
|
||||
import dataclasses
|
||||
import functools
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
@ -122,6 +123,13 @@ class Qube:
|
||||
|
||||
return Qube.root_node(list(from_dict(d)))
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
def to_dict(q: "Qube") -> tuple[str, dict]:
|
||||
key = f"{q.key}={','.join(str(v) for v in q.values.values)}"
|
||||
return key, dict(to_dict(c) for c in q.children)
|
||||
|
||||
return to_dict(self)[1]
|
||||
|
||||
@classmethod
|
||||
def from_tree(cls, tree_str):
|
||||
lines = tree_str.splitlines()
|
||||
@ -283,17 +291,20 @@ class Qube:
|
||||
else:
|
||||
yield leaf, metadata
|
||||
|
||||
def datacubes(self) -> "Qube":
|
||||
def to_list_of_cubes(node: Qube) -> Iterable[Qube]:
|
||||
if not node.children:
|
||||
yield node
|
||||
# print(node.key)
|
||||
def datacubes(self) -> Iterable[dict[str, Any | list[Any]]]:
|
||||
def to_list_of_cubes(node: Qube) -> Iterable[dict[str, Any | list[Any]]]:
|
||||
if node.key == "root":
|
||||
for c in node.children:
|
||||
# print(c)
|
||||
for sub_cube in to_list_of_cubes(c):
|
||||
yield node.replace(children=[sub_cube])
|
||||
yield from to_list_of_cubes(c)
|
||||
|
||||
return Qube.root_node((q for c in self.children for q in to_list_of_cubes(c)))
|
||||
if not node.children:
|
||||
yield {node.key: list(node.values.values)}
|
||||
|
||||
for c in node.children:
|
||||
for sub_cube in to_list_of_cubes(c):
|
||||
yield {node.key: list(node.values.values)} | sub_cube
|
||||
|
||||
return to_list_of_cubes(self)
|
||||
|
||||
def __getitem__(self, args) -> "Qube":
|
||||
if isinstance(args, str):
|
||||
@ -354,6 +365,22 @@ class Qube:
|
||||
children = tuple(cc for c in self.children for cc in transform(c))
|
||||
return self.replace(children=children)
|
||||
|
||||
def remove_by_key(self, keys: str | list[str]):
|
||||
_keys: list[str] = keys if isinstance(keys, list) else [keys]
|
||||
|
||||
def remove_key(node: "Qube") -> "Qube":
|
||||
children = []
|
||||
for c in node.children:
|
||||
if c.key in _keys:
|
||||
grandchildren = tuple(sorted(remove_key(cc) for cc in c.children))
|
||||
children.extend(grandchildren)
|
||||
else:
|
||||
children.append(remove_key(c))
|
||||
|
||||
return node.replace(children=tuple(sorted(children)))
|
||||
|
||||
return remove_key(self).compress()
|
||||
|
||||
def convert_dtypes(self, converters: dict[str, Callable[[Any], Any]]):
|
||||
def convert(node: Qube) -> Qube:
|
||||
if node.key in converters:
|
||||
@ -474,11 +501,25 @@ class Qube:
|
||||
return hash_node(self)
|
||||
|
||||
def compress(self) -> "Qube":
|
||||
# First compress the children (this recursively compresses all the way to the leaves)
|
||||
new_children = [child.compress() for child in self.children]
|
||||
"""
|
||||
This method is quite computationally heavy because of trees like this:
|
||||
root, class=d1, generation=1
|
||||
├── time=0600, many identical keys, param=8,78,79
|
||||
├── time=0600, many identical keys, param=8,78,79
|
||||
└── time=0600, many identical keys, param=8,78,79
|
||||
This tree compresses dow n
|
||||
|
||||
# Now compress the set of children at this level
|
||||
new_children = set_operations.compress_children(new_children)
|
||||
"""
|
||||
|
||||
# Return the now compressed node
|
||||
return Qube.make(self.key, self.values, new_children)
|
||||
def union(a: "Qube", b: "Qube") -> "Qube":
|
||||
b = type(self).root_node(children=(b,))
|
||||
out = set_operations.operation(
|
||||
a, b, set_operations.SetOperation.UNION, type(self)
|
||||
)
|
||||
return out
|
||||
|
||||
new_children = [c.compress() for c in self.children]
|
||||
if len(new_children) > 1:
|
||||
new_children = functools.reduce(union, new_children, Qube.empty()).children
|
||||
|
||||
return self.replace(children=tuple(sorted(new_children)))
|
||||
|
@ -168,3 +168,12 @@ def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]:
|
||||
|
||||
new_children.append(new_child)
|
||||
return tuple(sorted(new_children, key=lambda n: ((n.key, n.values.min()))))
|
||||
|
||||
|
||||
def union(a: "Qube", b: "Qube") -> "Qube":
|
||||
return operation(
|
||||
a,
|
||||
b,
|
||||
SetOperation.UNION,
|
||||
type(a),
|
||||
)
|
||||
|
@ -34,7 +34,7 @@ def summarize_node(
|
||||
while True:
|
||||
summary = node.summary(**kwargs)
|
||||
if "is_leaf" in node.metadata and node.metadata["is_leaf"]:
|
||||
summary += "🌿"
|
||||
summary += " 🌿"
|
||||
paths.append(summary)
|
||||
if len(summary) > max_summary_length:
|
||||
summary = summary[:max_summary_length] + "..."
|
||||
|
@ -15,19 +15,62 @@ def test_smoke():
|
||||
}
|
||||
)
|
||||
|
||||
# root
|
||||
# ├── class=od, expver=0001/0002, param=1/2
|
||||
# └── class=rd
|
||||
# ├── expver=0001, param=1/2/3
|
||||
# └── expver=0002, param=1/2
|
||||
ct = Qube.from_dict(
|
||||
ct = Qube.from_tree("""
|
||||
root
|
||||
├── class=od, expver=0001/0002, param=1/2
|
||||
└── class=rd
|
||||
├── expver=0001, param=1/2/3
|
||||
└── expver=0002, param=1/2
|
||||
""")
|
||||
|
||||
assert q.compress() == ct
|
||||
|
||||
|
||||
def test_2():
|
||||
qube = Qube.from_dict(
|
||||
{
|
||||
"class=od": {"expver=0001/0002": {"param=1/2": {}}},
|
||||
"class=rd": {
|
||||
"expver=0001": {"param=1/2/3": {}},
|
||||
"expver=0002": {"param=1/2": {}},
|
||||
},
|
||||
"class=d1": {
|
||||
"generation=1": {
|
||||
"date=20240728": {"time=0600": {"param=8/78/79": {}}},
|
||||
"date=20240828": {"time=0600": {"param=8/78/79": {}}},
|
||||
"date=20240928": {"time=0600": {"param=8/78/79": {}}},
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
assert q.compress() == ct
|
||||
target = Qube.from_datacube(
|
||||
{
|
||||
"class": "d1",
|
||||
"generation": "1",
|
||||
"date": ["20240728", "20240828", "20240928"],
|
||||
"time": "0600",
|
||||
"param": ["8", "78", "79"],
|
||||
}
|
||||
)
|
||||
assert qube.compress() == target
|
||||
|
||||
|
||||
def test_removal_compression():
|
||||
qube = Qube.from_dict(
|
||||
{
|
||||
"class=d1": {
|
||||
"generation=1": {
|
||||
"month=07": {"date=20240728": {"time=0600": {"param=8/78/79": {}}}},
|
||||
"month=08": {"date=20240828": {"time=0600": {"param=8/78/79": {}}}},
|
||||
"month=09": {"date=20240928": {"time=0600": {"param=8/78/79": {}}}},
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
target = Qube.from_datacube(
|
||||
{
|
||||
"class": "d1",
|
||||
"generation": "1",
|
||||
"date": ["20240728", "20240828", "20240928"],
|
||||
"time": "0600",
|
||||
"param": ["8", "78", "79"],
|
||||
}
|
||||
)
|
||||
assert qube.remove_by_key(["month"]) == target
|
||||
|
Loading…
x
Reference in New Issue
Block a user