Add alt-click copy of nodes paths, flesh out range types

2025-02-24 11:06:11 +00:00 · 2025-02-24 11:06:11 +00:00 · ef844c9b57
commit ef844c9b57
parent 1f7c5dfecd
6 changed files with 263 additions and 143 deletions
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@ -42,7 +42,34 @@ print(f"{cq.n_leaves = }, {cq.n_nodes = }")
 cq
 ```

-### Quick Tree Construction
+
+With the HTML representation you can click on the leaves to expand them. You can copy a path representation of a node to the clipboard by alt/option/⌥ clicking on it. You can then extract that node in code using `[]`:
+
+```{code-cell} python3
+cq["class=rd,expver=0001"]
+```
+
+Select a subtree:
+
+```{code-cell} python3
+cq["class", "od"]["expver", "0001"]
+```
+
+Intersect with a dense datacube:
+
+```{code-cell} python3
+dq = Qube.from_datacube({
+    "class": ["od", "rd", "cd"],
+    "expver": ["0001", "0002", "0003"],
+    "param": "2",
+})
+
+(cq & dq).print()
+```
+
+
+
+### Tree Construction

 One of the quickest ways to construct non-trivial trees is to use the `Qube.from_datacube` method to construct dense trees and then use the set operations to combine or intersect them:

@ -154,3 +181,11 @@ Symmetric Difference:
 (A ^ B).print();
 ```

+### Transformations
+
+`q.transform` takes a python function from one node to one or more nodes and uses this to build a new tree. This can be used for simple operations on the key or values but also to split or remove nodes. Note that you can't use it to merge nodes beause it's only allowed to see one node at a time.
+
+```{code-cell} python3
+def capitalize(node): return node.replace(key = node.key.capitalize())
+climate_dt.transform(capitalize).html(depth=1)
+```
--- a/src/python/qubed/Qube.py
+++ b/src/python/qubed/Qube.py
@ -8,7 +8,11 @@ from frozendict import frozendict

 from . import set_operations
 from .node_types import NodeData, RootNodeData
-from .tree_formatters import HTML, node_tree_to_html, node_tree_to_string
+from .tree_formatters import (
+    HTML,
+    node_tree_to_html,
+    node_tree_to_string,
+)
 from .value_types import QEnum, Values, values_from_json


@ -28,6 +32,16 @@ class Qube:
    @property
    def metadata(self) -> frozendict[str, Any]:
        return self.data.metadata
+    
+    def replace(self, **kwargs) -> 'Qube':
+        data_keys = {k : v for k, v in kwargs.items() if k in ["key", "values", "metadata"]}
+        node_keys = {k : v for k, v in kwargs.items() if k == "children"}
+        if not data_keys and not node_keys:
+            return self
+        if not data_keys:
+            return dataclasses.replace(self, **node_keys)
+        
+        return dataclasses.replace(self, data = dataclasses.replace(self.data, **data_keys), **node_keys)

    
    def summary(self) -> str:
@ -155,12 +169,28 @@ class Qube:
        return Qube.root_node((q for c in self.children for q in to_list_of_cubes(c)))
    
    def __getitem__(self, args) -> 'Qube':
-        key, value = args
-        for c in self.children:
-            if c.key == key and value in c.values:
-                data = dataclasses.replace(c.data, values = QEnum((value,)))
-                return dataclasses.replace(c, data = data)
-        raise KeyError(f"Key {key} not found in children of {self.key}")
+        if isinstance(args, str):
+            specifiers = args.split(",")
+            current = self
+            for specifier in specifiers:
+                key, values = specifier.split("=")
+                values = values.split("/")
+                for c in current.children:
+                    if c.key == key and set(values) == set(c.values):
+                        current = c
+                        break
+                else:
+                    raise KeyError(f"Key '{key}' not found in children of '{current.key}'")
+            return Qube.root_node(current.children)
+        
+        elif isinstance(args, tuple) and len(args) == 2:
+            key, value = args
+            for c in self.children:
+                if c.key == key and value in c.values:
+                    return Qube.root_node(c.children)
+            raise KeyError(f"Key {key} not found in children of {self.key}")
+        else:
+            raise ValueError("Unknown key type")

    @cached_property
    def n_leaves(self) -> int:
@ -173,7 +203,7 @@ class Qube:
        if self.key == "root" and not self.children: return 0
        return 1 + sum(c.n_nodes for c in self.children)

-    def transform(self, func: 'Callable[[Qube], Qube | list[Qube]]') -> 'Qube':
+    def transform(self, func: 'Callable[[Qube], Qube | Iterable[Qube]]') -> 'Qube':
        """
        Call a function on every node of the Qube, return one or more nodes.
        If multiple nodes are returned they each get a copy of the (transformed) children of the original node.
@ -185,7 +215,7 @@ class Qube:
            if isinstance(new_nodes, Qube):
                new_nodes = [new_nodes]

-            return [dataclasses.replace(new_node, children = children)
+            return [new_node.replace(children = children)
                    for new_node in new_nodes]
        
        children = tuple(cc for c in self.children for cc in transform(c))
@ -243,87 +273,6 @@ class Qube:
            axes[self.key].update(self.values)
        return dict(axes)

-    @staticmethod
-    def _insert(position: "Qube", identifier : list[tuple[str, list[str]]]):
-        """
-        This algorithm goes as follows:
-        We're at a particular node in the Qube, and we have a list of key-values pairs that we want to insert.
-        We take the first key values pair
-        key, values = identifier.pop(0)
-
-        The general idea is to insert key, values into the current node and use recursion to handle the rest of the identifier.
-        
-        We have two sources of values with possible overlap. The values to insert and the values attached to the children of this node.
-        For each value coming from either source we put it in one of three categories:
-            1) Values that exist only in the already existing child. (Coming exclusively from position.children)
-            2) Values that exist in both a child and the new values.
-            3) Values that exist only in the new values.
-            
-
-        Thus we add the values to insert to a set, and loop over the children.
-        For each child we partition its values into the three categories.
-
-        For 1) we create a new child node with the key, reduced set of values and the same children.
-        For 2)
-            Create a new child node with the key, and the values in group 2
-            Recurse to compute the children
-
-        Once we have finished looping over children we know all the values left over came exclusively from the new values.
-        So we:
-            Create a new node with these values.
-            Recurse to compute the children
-
-        Finally we return the node with all these new children.
-        """
-        pass
-        # if not identifier:
-        #     return position
-
-        # key, values = identifier.pop(0)
-        # # print(f"Inserting {key}={values} into {position.summary()}")
-
-        # # Only the children with the matching key are relevant.
-        # source_children = {c : [] for c in position.children if c.key == key}
-        # new_children = []
-
-        # values = set(values)
-        # for c in source_children:
-        #     values_set = set(c.values)
-        #     group_1 = values_set - values
-        #     group_2 = values_set & values
-        #     values = values - values_set # At the end of this loop values will contain only the new values
-
-        #     if group_1:
-        #         group_1_node = Qube.make(c.key, QEnum((group_1)), c.children)
-        #         new_children.append(group_1_node) # Add the unaffected part of this child
-            
-        #     if group_2:
-        #         new_node = Qube.make(key, QEnum((affected)), [])
-        #         new_node = Qube._insert(new_node, identifier)
-        #         new_children.append(new_node) # Add the affected part of this child
-
-
-        #     unaffected = [x for x in c.values if x not in affected]
-
-
-        #     if affected: # This check is not technically necessary, but it makes the code more readable
-
-
-        # # If there are any values not in any of the existing children, add them as a new child
-        # if entirely_new_values:
-        #     new_node = Qube.make(key, QEnum((entirely_new_values)), [])
-        #     new_children.append(Qube._insert(new_node, identifier))
-
-        return Qube.make(position.key, position.values, new_children)
-
-    def insert(self, identifier : dict[str, list[str]]) -> 'Qube':
-        insertion = [(k, v) for k, v in identifier.items()]
-        return Qube._insert(self, insertion)
-
-    def info(self):
-        cubes = self.to_list_of_cubes()
-        print(f"Number of distinct paths: {len(cubes)}")
-
    @cached_property
    def structural_hash(self) -> int:
        """
@ -343,7 +292,4 @@ class Qube:
        new_children = set_operations.compress_children(new_children)

        # Return the now compressed node
-        return Qube(
-            data = self.data,
-            children = tuple(sorted(new_children))
-        )
+        return Qube.make(self.key, self.values, new_children)
--- a/src/python/qubed/tree_formatters.py
+++ b/src/python/qubed/tree_formatters.py
@ -16,15 +16,17 @@ class HTML():
    def _repr_html_(self):
        return self.html

-def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, TreeLike]:
+def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, str, TreeLike]:
    """
    Extracts a summarized representation of the node while collapsing single-child paths.
    Returns the summary string and the last node in the chain that has multiple children.
    """
    summaries = []
+    paths = []
    
    while True:
        summary = node.summary(**kwargs)
+        paths.append(summary)
        if len(summary) > 50:
            summary = summary[:50] + "..."
        summaries.append(summary)
@ -36,10 +38,10 @@ def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, Tre
            break
        node = node.children[0]

-    return ", ".join(summaries), node
+    return ", ".join(summaries), ",".join(paths), node

 def node_tree_to_string(node : TreeLike, prefix : str = "", depth = None) -> Iterable[str]:
-    summary, node = summarize_node(node)
+    summary, path, node = summarize_node(node)
    
    if depth is not None and depth <= 0:
        yield summary + " - ...\n"
@ -59,14 +61,14 @@ def node_tree_to_string(node : TreeLike, prefix : str = "", depth = None) -> Ite
        yield from node_tree_to_string(child, prefix + extension, depth = depth - 1 if depth is not None else None)

 def _node_tree_to_html(node : TreeLike, prefix : str = "", depth = 1, connector = "", **kwargs) -> Iterable[str]:
-    summary, node = summarize_node(node, **kwargs)
+    summary, path, node = summarize_node(node, **kwargs)
    
    if len(node.children) == 0:
-        yield f'<span class="leaf">{connector}{summary}</span>'
+        yield f'<span class="qubed-node leaf" data-path="{path}">{connector}{summary}</span>'
        return
    else:
        open = "open" if depth > 0 else ""
-        yield f"<details {open}><summary>{connector}{summary}</summary>"
+        yield f'<details {open} data-path="{path}"><summary class="qubed-node">{connector}{summary}</summary>'

    for index, child in enumerate(node.children):
        connector = "└── " if index == len(node.children) - 1 else "├── "
@ -76,10 +78,12 @@ def _node_tree_to_html(node : TreeLike, prefix : str = "", depth = 1, connector

 def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
        css_id = f"qubed-tree-{random.randint(0, 1000000)}"
-        css = f"""
+        
+        # It's ugle to use an f string here because css uses {} so much so instead
+        # we use CSS_ID as a placeholder and replace it later
+        css = """
        <style>
-        pre#{css_id} """ \
-        """{
+        pre#CSS_ID {
            font-family: monospace;
            white-space: pre;
            font-family: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;
@ -90,6 +94,11 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
                margin-left: 0;
            }

+            .qubed-node a {
+                margin-left: 10px;
+                text-decoration: none;
+            }
+
            summary {
                list-style: none;
                cursor: pointer;
@ -125,6 +134,30 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:

        }
        </style>
-        """
+        """.replace("CSS_ID", css_id)
+
+        # This js snippet copies the path of a node to the clipboard when clicked
+        js = """
+        <script type="module" defer>
+        async function nodeOnClick(event) {
+            if (!event.altKey) return;
+            event.preventDefault();
+            let current_element = this.parentElement;
+            let paths = [];
+            while (true) {
+                if (current_element.dataset.path) {
+                    paths.push(current_element.dataset.path);
+                }
+                current_element = current_element.parentElement;
+                if (current_element.tagName == "PRE") break;
+            }
+            const path = paths.reverse().slice(1).join(",");
+            await navigator.clipboard.writeText(path);
+        }
+
+        const nodes = document.querySelectorAll("#CSS_ID .qubed-node");
+        nodes.forEach(n => n.addEventListener("click", nodeOnClick));
+        </script>
+        """.replace("CSS_ID", css_id)
        nodes = "".join(_node_tree_to_html(node=node, depth=depth, **kwargs))
-        return f"{css}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"
+        return f"{js}{css}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"
--- a/src/python/qubed/value_types.py
+++ b/src/python/qubed/value_types.py
@ -1,9 +1,11 @@
 import dataclasses
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from datetime import date, datetime, timedelta
-from typing import Any, FrozenSet, Iterable, Literal, TypeVar
+from typing import TYPE_CHECKING, Any, FrozenSet, Iterable, Literal, TypeVar

+if TYPE_CHECKING:
+    from .Qube import Qube

@dataclass(frozen=True)
 class Values(ABC):
@ -67,11 +69,32 @@ class QEnum(Values):
        return min(self.values)
    def to_json(self):
        return list(self.values)
+    
+class DateEnum(QEnum):  
+    def summary(self) -> str:
+        def fmt(d): return d.strftime("%Y%m%d")
+        return '/'.join(map(fmt, sorted(self.values)))

@dataclass(frozen=True)
 class Range(Values, ABC):
    dtype: str = dataclasses.field(kw_only=True)

+    start: Any
+    end: Any
+    step: Any
+
+    def min(self):
+        return self.start
+    
+    def __iter__(self) -> Iterable[Any]:
+        i = self.start
+        while i <= self.end:
+            yield i
+            i += self.step
+
+    def to_json(self):
+        return dataclasses.asdict(self)
+
@dataclass(frozen=True)
 class DateRange(Range):
    start: date
@ -89,36 +112,58 @@ class DateRange(Range):
            current += self.step

    @classmethod
-    def from_strings(self, values: Iterable[str]) -> list['DateRange']:
+    def from_strings(cls, values: Iterable[str]) -> "list[DateRange | QEnum]":
        dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values])
        if len(dates) < 2:
-            return [DateRange(
-                start=dates[0],
-                end=dates[0],
-                step=timedelta(days=0)
-            )]
+            return [DateEnum(dates)]
        
        ranges = []
-        current_range, dates = [dates[0],], dates[1:]
+        current_group, dates = [dates[0],], dates[1:]
+        current_type : Literal["enum", "range"] = "enum"
        while len(dates) > 1:
-            if dates[0] - current_range[-1] == timedelta(days=1):
-                current_range.append(dates.pop(0))
-            
-            elif len(current_range) == 1:
-                ranges.append(DateRange(
-                start=current_range[0],
-                end=current_range[0],
-                step=timedelta(days=0)
-                ))
-                current_range = [dates.pop(0),]
+            if current_type == "range":

-            else:
+                # If the next date fits then add it to the current range
+                if dates[0] - current_group[-1] == timedelta(days=1):
+                    current_group.append(dates.pop(0))
+
+
+                # Emit the current range and start a new one
+                else:
+                    if len(current_group) == 1:
+                        ranges.append(DateEnum(current_group))
+                    else:
+                        ranges.append(DateRange(
+                            start=current_group[0],
+                            end=current_group[-1],
+                            step=timedelta(days=1)
+                        ))
+                    current_group = [dates.pop(0),]
+                    current_type = "enum"
+            
+            if current_type == "enum":
+                # If the next date is one more than the last then switch to range mode
+                if dates[0] - current_group[-1] == timedelta(days=1):
+                    last = current_group.pop()
+                    if current_group:
+                        ranges.append(DateEnum(current_group))
+                    current_group = [last, dates.pop(0)]
+                    current_type = "range"
+
+                else:
+                    current_group.append(dates.pop(0))
+
+        # Handle remaining `current_group`
+        if current_group:
+            if current_type == "range":
                ranges.append(DateRange(
-                start=current_range[0],
-                end=current_range[-1],
-                step=timedelta(days=1)
+                    start=current_group[0],
+                    end=current_group[-1],
+                    step=timedelta(days=1)
                ))
-                current_range = [dates.pop(0),]
+            else:
+                ranges.append(DateEnum(current_group))
+
        return ranges
    
    def __contains__(self, value: Any) -> bool:
@ -141,6 +186,11 @@ class TimeRange(Range):
    step: int
    dtype: Literal["time"] = dataclasses.field(kw_only=True, default="time")

+    def min(self):
+        return self.start
+    def __iter__(self) -> Iterable[Any]:
+        return super().__iter__()
+
    @classmethod
    def from_strings(self, values: Iterable[str]) -> list['TimeRange']:
        times = sorted([int(v) for v in values])
@ -198,7 +248,9 @@ class IntRange(Range):
        return (self.end - self.start) // self.step
    
    def summary(self) -> str:
-        def fmt(d): return d.strftime("%Y%m%d")
+        def fmt(d): return d
+        if self.step == 0:
+            return f"{fmt(self.start)}"
        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
    
    def __contains__(self, value: Any) -> bool:
@ -247,3 +299,17 @@ def values_from_json(obj) -> Values:
        case "time": return TimeRange(**obj)
        case "int": return IntRange(**obj)
        case _: raise ValueError(f"Unknown dtype {obj['dtype']}")
+
+
+def convert_datatypes(q: "Qube", conversions: dict[str, Values]) -> "Qube":
+    def _convert(q: "Qube") -> Iterable["Qube"]:
+        if q.key in conversions:
+            data_type = conversions[q.key]
+            assert isinstance(q.values, QEnum), "Only QEnum values can be converted to other datatypes."
+            for values_group in data_type.from_strings(q.values):
+                # print(values_group)
+                yield replace(q, data=replace(q.data, values=values_group))
+        else:
+            yield q
+
+    return q.transform(_convert)
--- a/tests/test_basic_operations.py
+++ b/tests/test_basic_operations.py
@ -1,22 +1,31 @@
 from qubed import Qube

+d = {
+    "class=od" : {
+        "expver=0001": {"param=1":{}, "param=2":{}},
+        "expver=0002": {"param=1":{}, "param=2":{}},
+    },
+    "class=rd" : {
+        "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
+        "expver=0002": {"param=1":{}, "param=2":{}},
+    },
+}
+q = Qube.from_dict(d)

 def test_eq():
-    d = {
-        "class=od" : {
-            "expver=0001": {"param=1":{}, "param=2":{}},
-            "expver=0002": {"param=1":{}, "param=2":{}},
-        },
-        "class=rd" : {
-            "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
-            "expver=0002": {"param=1":{}, "param=2":{}},
-        },
-    }
-    q = Qube.from_dict(d)
    r = Qube.from_dict(d)
-
    assert q == r

+def test_getitem():
+    assert q["class", "od"] == Qube.from_dict({
+        "expver=0001": {"param=1":{}, "param=2":{}},
+        "expver=0002": {"param=1":{}, "param=2":{}},
+    })
+    assert q["class", "od"]["expver", "0001"] == Qube.from_dict({
+        "param=1":{}, "param=2":{},
+    })
+
+
 def test_n_leaves():
    q = Qube.from_dict({
        "a=1/2/3" : {"b=1/2/3" : {"c=1/2/3" : {}}},
--- a/tests/test_formatters.py
+++ b/tests/test_formatters.py
@ -0,0 +1,31 @@
+from qubed import Qube
+
+d = {
+    "class=od" : {
+        "expver=0001": {"param=1":{}, "param=2":{}},
+        "expver=0002": {"param=1":{}, "param=2":{}},
+    },
+    "class=rd" : {
+        "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
+        "expver=0002": {"param=1":{}, "param=2":{}},
+    },
+}
+q = Qube.from_dict(d).compress()
+
+as_string= """
+root
+├── class=od, expver=0001/0002, param=1/2
+└── class=rd
+    ├── expver=0001, param=1/2/3
+    └── expver=0002, param=1/2
+""".strip()
+
+as_html = """
+<details open data-path="root"><summary class="qubed-node">root</summary><span class="qubed-node leaf" data-path="class=od,expver=0001/0002,param=1/2">├── class=od, expver=0001/0002, param=1/2</span><details open data-path="class=rd"><summary class="qubed-node">└── class=rd</summary><span class="qubed-node leaf" data-path="expver=0001,param=1/2/3">    ├── expver=0001, param=1/2/3</span><span class="qubed-node leaf" data-path="expver=0002,param=1/2">    └── expver=0002, param=1/2</span></details></details>
+""".strip()
+
+def test_string():
+    assert str(q).strip() == as_string
+
+def test_html():
+    assert as_html in q._repr_html_()