progress on metadata
This commit is contained in:
parent
a85b700084
commit
110046b251
@ -49,9 +49,15 @@ class Qube:
|
|||||||
def metadata(self):
|
def metadata(self):
|
||||||
return self.data.metadata
|
return self.data.metadata
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dtype(self):
|
||||||
|
return self.data.dtype
|
||||||
|
|
||||||
def replace(self, **kwargs) -> Qube:
|
def replace(self, **kwargs) -> Qube:
|
||||||
data_keys = {
|
data_keys = {
|
||||||
k: v for k, v in kwargs.items() if k in ["key", "values", "metadata"]
|
k: v
|
||||||
|
for k, v in kwargs.items()
|
||||||
|
if k in ["key", "values", "metadata", "dtype"]
|
||||||
}
|
}
|
||||||
node_keys = {k: v for k, v in kwargs.items() if k == "children"}
|
node_keys = {k: v for k, v in kwargs.items() if k == "children"}
|
||||||
if not data_keys and not node_keys:
|
if not data_keys and not node_keys:
|
||||||
@ -69,7 +75,9 @@ class Qube:
|
|||||||
@classmethod
|
@classmethod
|
||||||
def make(cls, key: str, values: ValueGroup, children, **kwargs) -> Qube:
|
def make(cls, key: str, values: ValueGroup, children, **kwargs) -> Qube:
|
||||||
return cls(
|
return cls(
|
||||||
data=NodeData(key, values, metadata=kwargs.get("metadata", frozendict())),
|
data=NodeData(
|
||||||
|
key, values, metadata=frozendict(kwargs.get("metadata", frozendict()))
|
||||||
|
),
|
||||||
children=tuple(sorted(children, key=lambda n: ((n.key, n.values.min())))),
|
children=tuple(sorted(children, key=lambda n: ((n.key, n.values.min())))),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -217,11 +225,17 @@ class Qube:
|
|||||||
if name is not None
|
if name is not None
|
||||||
else self
|
else self
|
||||||
)
|
)
|
||||||
return "".join(node_tree_to_string(node=node, depth=depth))
|
out = "".join(node_tree_to_string(node=node, depth=depth))
|
||||||
|
if out[-1] == "\n":
|
||||||
|
out = out[:-1]
|
||||||
|
return out
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.__str_helper__()
|
return self.__str_helper__()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"Qube({self.__str_helper__()})"
|
||||||
|
|
||||||
def print(self, depth=None, name: str | None = None):
|
def print(self, depth=None, name: str | None = None):
|
||||||
print(self.__str_helper__(depth=depth, name=name))
|
print(self.__str_helper__(depth=depth, name=name))
|
||||||
|
|
||||||
@ -409,7 +423,8 @@ class Qube:
|
|||||||
def convert(node: Qube) -> Qube:
|
def convert(node: Qube) -> Qube:
|
||||||
if node.key in converters:
|
if node.key in converters:
|
||||||
converter = converters[node.key]
|
converter = converters[node.key]
|
||||||
new_node = node.replace(values=QEnum(map(converter, node.values)))
|
values = [converter(v) for v in node.values]
|
||||||
|
new_node = node.replace(values=QEnum(values), dtype=type(values[0]))
|
||||||
return new_node
|
return new_node
|
||||||
return node
|
return node
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, Iterator
|
from typing import TYPE_CHECKING, Iterator
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -13,7 +15,7 @@ def make_node(
|
|||||||
key: str,
|
key: str,
|
||||||
values: Iterator,
|
values: Iterator,
|
||||||
shape: list[int],
|
shape: list[int],
|
||||||
children: "tuple[Qube]",
|
children: tuple[Qube, ...],
|
||||||
metadata: dict[str, np.ndarray] | None = None,
|
metadata: dict[str, np.ndarray] | None = None,
|
||||||
):
|
):
|
||||||
return cls.make(
|
return cls.make(
|
||||||
@ -30,11 +32,11 @@ def from_nodes(cls, nodes, add_root=True):
|
|||||||
shape = [len(n["values"]) for n in nodes.values()]
|
shape = [len(n["values"]) for n in nodes.values()]
|
||||||
nodes = nodes.items()
|
nodes = nodes.items()
|
||||||
*nodes, (key, info) = nodes
|
*nodes, (key, info) = nodes
|
||||||
root = make_node(shape=shape, children=(), key=key, **info)
|
root = make_node(cls, shape=shape, children=(), key=key, **info)
|
||||||
|
|
||||||
for key, info in reversed(nodes):
|
for key, info in reversed(nodes):
|
||||||
shape.pop()
|
shape.pop()
|
||||||
root = make_node(shape=shape, children=(root,), key=key, **info)
|
root = make_node(cls, shape=shape, children=(root,), key=key, **info)
|
||||||
|
|
||||||
if add_root:
|
if add_root:
|
||||||
return cls.root_node(children=(root,))
|
return cls.root_node(children=(root,))
|
||||||
|
@ -13,6 +13,7 @@ class NodeData:
|
|||||||
metadata: frozendict[str, np.ndarray] = field(
|
metadata: frozendict[str, np.ndarray] = field(
|
||||||
default_factory=lambda: frozendict({}), compare=False
|
default_factory=lambda: frozendict({}), compare=False
|
||||||
)
|
)
|
||||||
|
dtype: type = str
|
||||||
|
|
||||||
def summary(self) -> str:
|
def summary(self) -> str:
|
||||||
return f"{self.key}={self.values.summary()}" if self.key != "root" else "root"
|
return f"{self.key}={self.values.summary()}" if self.key != "root" else "root"
|
||||||
|
@ -40,7 +40,6 @@ def QEnum_intersection(
|
|||||||
|
|
||||||
for index_a, val_A in enumerate(A.values):
|
for index_a, val_A in enumerate(A.values):
|
||||||
if val_A in B.values:
|
if val_A in B.values:
|
||||||
# print(f"{val_A} in both")
|
|
||||||
just_B.pop(val_A)
|
just_B.pop(val_A)
|
||||||
intersection[val_A] = (
|
intersection[val_A] = (
|
||||||
index_a # We throw away any overlapping metadata from B
|
index_a # We throw away any overlapping metadata from B
|
||||||
@ -116,9 +115,8 @@ def operation(A: Qube, B: Qube, operation_type: SetOperation, node_type) -> Qube
|
|||||||
|
|
||||||
# For every node group, perform the set operation
|
# For every node group, perform the set operation
|
||||||
for key, (A_nodes, B_nodes) in nodes_by_key.items():
|
for key, (A_nodes, B_nodes) in nodes_by_key.items():
|
||||||
new_children.extend(
|
output = list(_operation(key, A_nodes, B_nodes, operation_type, node_type))
|
||||||
_operation(key, A_nodes, B_nodes, operation_type, node_type)
|
new_children.extend(output)
|
||||||
)
|
|
||||||
|
|
||||||
# Whenever we modify children we should recompress them
|
# Whenever we modify children we should recompress them
|
||||||
# But since `operation` is already recursive, we only need to compress this level not all levels
|
# But since `operation` is already recursive, we only need to compress this level not all levels
|
||||||
@ -193,17 +191,17 @@ def compress_children(children: Iterable[Qube]) -> tuple[Qube, ...]:
|
|||||||
"""
|
"""
|
||||||
# Take the set of new children and see if any have identical key, metadata and children
|
# Take the set of new children and see if any have identical key, metadata and children
|
||||||
# the values may different and will be collapsed into a single node
|
# the values may different and will be collapsed into a single node
|
||||||
identical_children = defaultdict(set)
|
|
||||||
|
identical_children = defaultdict(list)
|
||||||
for child in children:
|
for child in children:
|
||||||
# only care about the key and children of each node, ignore values
|
# only care about the key and children of each node, ignore values
|
||||||
h = hash((child.key, tuple((cc.structural_hash for cc in child.children))))
|
h = hash((child.key, tuple((cc.structural_hash for cc in child.children))))
|
||||||
identical_children[h].add(child)
|
identical_children[h].append(child)
|
||||||
|
|
||||||
# Now go through and create new compressed nodes for any groups that need collapsing
|
# Now go through and create new compressed nodes for any groups that need collapsing
|
||||||
new_children = []
|
new_children = []
|
||||||
for child_set in identical_children.values():
|
for child_list in identical_children.values():
|
||||||
if len(child_set) > 1:
|
if len(child_list) > 1:
|
||||||
child_list = list(child_set)
|
|
||||||
example = child_list[0]
|
example = child_list[0]
|
||||||
node_type = type(example)
|
node_type = type(example)
|
||||||
key = child_list[0].key
|
key = child_list[0].key
|
||||||
@ -217,9 +215,10 @@ def compress_children(children: Iterable[Qube]) -> tuple[Qube, ...]:
|
|||||||
k: [child.metadata[k] for child in child_list]
|
k: [child.metadata[k] for child in child_list]
|
||||||
for k in example.metadata.keys()
|
for k in example.metadata.keys()
|
||||||
}
|
}
|
||||||
|
|
||||||
metadata: frozendict[str, np.ndarray] = frozendict(
|
metadata: frozendict[str, np.ndarray] = frozendict(
|
||||||
{
|
{
|
||||||
k: np.concatenate(metadata_group, axis=-1)
|
k: np.concatenate(metadata_group, axis=0)
|
||||||
for k, metadata_group in metadata_groups.items()
|
for k, metadata_group in metadata_groups.items()
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -227,12 +226,14 @@ def compress_children(children: Iterable[Qube]) -> tuple[Qube, ...]:
|
|||||||
node_data = NodeData(
|
node_data = NodeData(
|
||||||
key=key,
|
key=key,
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
values=QEnum((v for child in child_list for v in child.data.values)),
|
values=QEnum(set(v for child in child_list for v in child.data.values)),
|
||||||
)
|
)
|
||||||
new_child = node_type(data=node_data, children=child_list[0].children)
|
children = [cc for c in child_list for cc in c.children]
|
||||||
|
compressed_children = compress_children(children)
|
||||||
|
new_child = node_type(data=node_data, children=compressed_children)
|
||||||
else:
|
else:
|
||||||
# If the group is size one just keep it
|
# If the group is size one just keep it
|
||||||
new_child = child_set.pop()
|
new_child = child_list.pop()
|
||||||
|
|
||||||
new_children.append(new_child)
|
new_children.append(new_child)
|
||||||
|
|
||||||
|
@ -4,6 +4,8 @@ import random
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import TYPE_CHECKING, Iterable
|
from typing import TYPE_CHECKING, Iterable
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from .Qube import Qube
|
from .Qube import Qube
|
||||||
|
|
||||||
@ -68,17 +70,51 @@ def node_tree_to_string(node: Qube, prefix: str = "", depth=None) -> Iterable[st
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_node_html(
|
||||||
|
node: Qube, collapse=False, max_summary_length=50, **kwargs
|
||||||
|
) -> tuple[str, Qube]:
|
||||||
|
"""
|
||||||
|
Extracts a summarized representation of the node while collapsing single-child paths.
|
||||||
|
Returns the summary string and the last node in the chain that has multiple children.
|
||||||
|
"""
|
||||||
|
summaries = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
path = node.summary(**kwargs)
|
||||||
|
summary = path
|
||||||
|
if "is_leaf" in node.metadata and node.metadata["is_leaf"]:
|
||||||
|
summary += " 🌿"
|
||||||
|
|
||||||
|
if len(summary) > max_summary_length:
|
||||||
|
summary = summary[:max_summary_length] + "..."
|
||||||
|
info = (
|
||||||
|
f"dtype: {node.dtype.__name__}\n"
|
||||||
|
f"metadata: {dict((k, np.shape(v)) for k, v in node.metadata.items())}\n"
|
||||||
|
)
|
||||||
|
summary = f'<span class="qubed-node" data-path="{path}" title="{info}">{summary}</span>'
|
||||||
|
summaries.append(summary)
|
||||||
|
if not collapse:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Move down if there's exactly one child, otherwise stop
|
||||||
|
if len(node.children) != 1:
|
||||||
|
break
|
||||||
|
node = node.children[0]
|
||||||
|
|
||||||
|
return ", ".join(summaries), node
|
||||||
|
|
||||||
|
|
||||||
def _node_tree_to_html(
|
def _node_tree_to_html(
|
||||||
node: Qube, prefix: str = "", depth=1, connector="", **kwargs
|
node: Qube, prefix: str = "", depth=1, connector="", **kwargs
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
summary, path, node = summarize_node(node, **kwargs)
|
summary, node = summarize_node_html(node, **kwargs)
|
||||||
|
|
||||||
if len(node.children) == 0:
|
if len(node.children) == 0:
|
||||||
yield f'<span class="qubed-node leaf" data-path="{path}">{connector}{summary}</span>'
|
yield f'<span class="qubed-level">{connector}{summary}</span>'
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
open = "open" if depth > 0 else ""
|
open = "open" if depth > 0 else ""
|
||||||
yield f'<details {open} data-path="{path}"><summary class="qubed-node">{connector}{summary}</summary>'
|
yield f'<details {open}><summary class="qubed-level">{connector}{summary}</summary>'
|
||||||
|
|
||||||
for index, child in enumerate(node.children):
|
for index, child in enumerate(node.children):
|
||||||
connector = "└── " if index == len(node.children) - 1 else "├── "
|
connector = "└── " if index == len(node.children) - 1 else "├── "
|
||||||
@ -114,7 +150,7 @@ def node_tree_to_html(
|
|||||||
margin-left: 0;
|
margin-left: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.qubed-node a {
|
.qubed-level a {
|
||||||
margin-left: 10px;
|
margin-left: 10px;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
@ -128,7 +164,7 @@ def node_tree_to_html(
|
|||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
|
||||||
summary:hover,span.leaf:hover {
|
span.qubed-node:hover {
|
||||||
background-color: #f0f0f0;
|
background-color: #f0f0f0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,7 +176,7 @@ def node_tree_to_html(
|
|||||||
content: " ▼";
|
content: " ▼";
|
||||||
}
|
}
|
||||||
|
|
||||||
.leaf {
|
.qubed-level {
|
||||||
text-overflow: ellipsis;
|
text-overflow: ellipsis;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
text-wrap: nowrap;
|
text-wrap: nowrap;
|
||||||
|
@ -21,7 +21,7 @@ root
|
|||||||
""".strip()
|
""".strip()
|
||||||
|
|
||||||
as_html = """
|
as_html = """
|
||||||
<details open data-path="root"><summary class="qubed-node">root</summary><span class="qubed-node leaf" data-path="class=od,expver=0001/0002,param=1/2">├── class=od, expver=0001/0002, param=1/2</span><details open data-path="class=rd"><summary class="qubed-node">└── class=rd</summary><span class="qubed-node leaf" data-path="expver=0001,param=1/2/3"> ├── expver=0001, param=1/2/3</span><span class="qubed-node leaf" data-path="expver=0002,param=1/2"> └── expver=0002, param=1/2</span></details></details>
|
<details open><summary class="qubed-level"><span class="qubed-node" data-path="root" title="dtype: str\nmetadata: {}\n">root</span></summary><span class="qubed-level">├── <span class="qubed-node" data-path="class=od" title="dtype: str\nmetadata: {}\n">class=od</span>, <span class="qubed-node" data-path="expver=0001/0002" title="dtype: str\nmetadata: {}\n">expver=0001/0002</span>, <span class="qubed-node" data-path="param=1/2" title="dtype: str\nmetadata: {}\n">param=1/2</span></span><details open><summary class="qubed-level">└── <span class="qubed-node" data-path="class=rd" title="dtype: str\nmetadata: {}\n">class=rd</span></summary><span class="qubed-level"> ├── <span class="qubed-node" data-path="expver=0001" title="dtype: str\nmetadata: {}\n">expver=0001</span>, <span class="qubed-node" data-path="param=1/2/3" title="dtype: str\nmetadata: {}\n">param=1/2/3</span></span><span class="qubed-level"> └── <span class="qubed-node" data-path="expver=0002" title="dtype: str\nmetadata: {}\n">expver=0002</span>, <span class="qubed-node" data-path="param=1/2" title="dtype: str\nmetadata: {}\n">param=1/2</span></span></details></details>
|
||||||
""".strip()
|
""".strip()
|
||||||
|
|
||||||
|
|
||||||
|
@ -0,0 +1,45 @@
|
|||||||
|
from frozendict import frozendict
|
||||||
|
from qubed import Qube
|
||||||
|
|
||||||
|
|
||||||
|
def make_set(entries):
|
||||||
|
return set((frozendict(a), frozendict(b)) for a, b in entries)
|
||||||
|
|
||||||
|
|
||||||
|
def test_simple_union():
|
||||||
|
q = Qube.from_nodes(
|
||||||
|
{
|
||||||
|
"class": dict(values=["od", "rd"]),
|
||||||
|
"expver": dict(values=[1, 2]),
|
||||||
|
"stream": dict(
|
||||||
|
values=["a", "b", "c"], metadata=dict(number=list(range(12)))
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
r = Qube.from_nodes(
|
||||||
|
{
|
||||||
|
"class": dict(values=["xd"]),
|
||||||
|
"expver": dict(values=[1, 2]),
|
||||||
|
"stream": dict(
|
||||||
|
values=["a", "b", "c"], metadata=dict(number=list(range(12, 18)))
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_union = Qube.from_nodes(
|
||||||
|
{
|
||||||
|
"class": dict(values=["od", "rd", "xd"]),
|
||||||
|
"expver": dict(values=[1, 2]),
|
||||||
|
"stream": dict(
|
||||||
|
values=["a", "b", "c"], metadata=dict(number=list(range(18)))
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
union = q | r
|
||||||
|
|
||||||
|
assert union == expected_union
|
||||||
|
assert make_set(expected_union.leaves_with_metadata()) == make_set(
|
||||||
|
union.leaves_with_metadata()
|
||||||
|
)
|
Loading…
x
Reference in New Issue
Block a user