updates

2025-02-10 15:26:25 +00:00 · 2025-02-10 15:26:25 +00:00 · 609e3e9f74
commit 609e3e9f74
parent 81a478a58f
9 changed files with 888 additions and 13 deletions
--- a/tree_compresser/Cargo.lock
+++ b/tree_compresser/Cargo.lock
@ -1,6 +1,6 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-version = 3
+version = 4
 [[package]]
 name = "aho-corasick"
@ -359,7 +359,6 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
 [[package]]
 name = "rsfdb"
 version = "0.1.0"
 source = "git+https://github.com/ecmwf/rsfdb?branch=develop#ab8c9590bba15d22167c274db9238cd9b897baf1"
 dependencies = [
 "libc",
 "libloading",
@ -372,7 +371,6 @@ dependencies = [
 [[package]]
 name = "rsfindlibs"
 version = "0.1.1"
 source = "git+https://github.com/ecmwf-projects/rsfindlibs.git#1358b1049bf3e0b581badfc8005a9828a542cdaa"
 dependencies = [
 "cc",
 "clap",
--- a/tree_compresser/Cargo.toml
+++ b/tree_compresser/Cargo.toml
@ -1,7 +1,8 @@
 [package]
-name = "qubed_tree"
+name = "qubed"
-version = "0.1.0"
+version = "0.1.2"
 edition = "2021"
 repository = "https://github.com/ecmwf/qubed"
 [dependencies]
 rsfdb = {git = "https://github.com/ecmwf/rsfdb", branch = "develop"}
@ -16,7 +17,7 @@ crate-type = ["cdylib"]
 path = "./rust_src/lib.rs"
 [patch.'https://github.com/ecmwf/rsfdb']
-rsfdb = { path = "../rsfdb" }
+rsfdb = { path = "../../rsfdb" }
 [patch.'https://github.com/ecmwf-projects/rsfindlibs']
-rsfindlibs = { path = "../rsfindlibs" }
+rsfindlibs = { path = "../../rsfindlibs" }
--- a/tree_compresser/python_src/tree_traverser/CompressedDataCubeTree.py
+++ b/tree_compresser/python_src/tree_traverser/CompressedDataCubeTree.py
@ -0,0 +1,216 @@
 import dataclasses
 from collections import defaultdict
 from dataclasses import dataclass, field
 from frozendict import frozendict
 from .DataCubeTree import Enum, NodeData, Tree
 from .tree_formatters import HTML, node_tree_to_html, node_tree_to_string
 NodeId = int
 CacheType = dict[NodeId, "CompressedNode"]
@dataclass(frozen=True)
 class CompressedNode:
    id: NodeId = field(hash=False, compare=False)
    data: NodeData
    _children: tuple[NodeId, ...]
    _cache: CacheType = field(repr=False, hash=False, compare=False)
    @property
    def children(self) -> tuple["CompressedNode", ...]:
        return tuple(self._cache[i] for i in self._children)
    def summary(self, debug = False) -> str:
        if debug:  return f"{self.data.key}={self.data.values.summary()} ({self.id})"
        return f"{self.data.key}={self.data.values.summary()}" if self.data.key != "root" else "root"
@dataclass(frozen=True)
 class CompressedTree:
    """
    This tree is compressed in two distinct different ways:
    1. Product Compression: Nodes have a key and **multiple values**, so each node represents many logical nodes key=value1, key=value2, ...
       Each of these logical nodes is has identical children so we can compress them like this.
       In this way any distinct path through the tree represents a cartesian product of the values, otherwise known as a datacube.
    2. In order to facilitate the product compression described above we need to know when two nodes have identical children.
       To do this every node is assigned an Id which is initially computed as a hash from the nodes data and its childrens' ids.
       In order to avoid hash collisions we increment the initial hash if it's already in the cache for a different node 
       we do this until we find a unique id.
       Crucially this allows us to later determine if a new node is already cached: 
        id = hash(node)
        while True:
            if id not in cache: The node is definitely not in the cache
            elif cache[id] != node: Hash collision, increment id and try again
            else: The node is already in the cache
            id += 1
    This tree can be walked from the root by repeatedly looking up the children of a node in the cache.
    This structure facilitates compression because we can look at the children of a node:
        If two chidren have the same key, metadata and children then we can compress them into a single node.
 """
    root: CompressedNode
    cache: CacheType
    @staticmethod
    def add_to_cache(cache : dict[NodeId, CompressedNode], data : NodeData, _children: tuple[NodeId, ...]) -> NodeId:
        """
        This function is responsible for adding a new node to the cache and returning its id.
        Crucially we need a way to check if new nodes are already in the cache, so we hash them.
        But in case of a hash collision we need to increment the id and try again.
        This way we will always eventually find a unique id for the node.
        And we will never store the same node twice with a different id.
        """
        _children = tuple(sorted(_children))
        id = hash((data, _children))
        # To avoid hash collisions, we increment the id until we find a unique one
        tries = 0
        while True:
            tries += 1
            if id not in cache:
                # The node isn't in the cache and this id is free
                cache[id] = CompressedNode(id = id,
                                           data = data,
                                           _children = _children,
                                           _cache = cache)
                break 
            if cache[id].data == data and cache[id]._children == _children:
                break # The node is already in the cache
            # This id is already in use by a different node so increment it (mod) and try again
            id = (id + 1) % (2**64)
            if tries > 100:
                raise RuntimeError("Too many hash collisions, something is wrong.")
        return id
    @classmethod
    def from_tree(cls, tree : Tree) -> 'CompressedTree':
        cache = {}
        def cache_tree(level : Tree) -> NodeId:
            node_data = NodeData(
                key = level.key,
                values = level.values,
            )
            # Recursively cache the children
            children = tuple(cache_tree(c) for c in level.children)
            # Add the node to the cache and return its id
            return cls.add_to_cache(cache, node_data, children)
        root = cache_tree(tree)
        return cls(cache = cache, root = cache[root])
    def __str__(self):
        return "".join(node_tree_to_string(self.root))
    def html(self, depth = 2, debug = False) -> HTML:
        return HTML(node_tree_to_html(self.root, depth = depth, debug = debug))
    def _repr_html_(self) -> str:
        return node_tree_to_html(self.root, depth = 2)
    def __getitem__(self, args) -> 'CompressedTree':
        key, value = args
        for c in self.root.children:
            if c.data.key == key and value in c.data.values:
                data = dataclasses.replace(c.data, values = Enum((value,)))
                return CompressedTree(
                    cache = self.cache,
                    root = dataclasses.replace(c, data = data)
                )
        raise KeyError(f"Key {key} not found in children.")
    def collapse_children(self, node: "CompressedNode") -> "CompressedNode":
        # First perform the collapse on the children
        new_children = [self.collapse_children(child) for child in node.children]
        # Now take the set of new children and see if any have identical key, metadata and children
        # the values may different and will be collapsed into a single node
        identical_children = defaultdict(set)
        for child in new_children:
            identical_children[(child.data.key, child.data.metadata, child._children)].add(child)
        # Now go through and create new compressed nodes for any groups that need collapsing
        new_children = []
        for (key, metadata, _children), child_set in identical_children.items():
            if len(child_set) > 1:
                # Compress the children into a single node
                assert all(isinstance(child.data.values, Enum) for child in child_set), "All children must have Enum values"
                node_data = NodeData(
                    key = key,
                    metadata = frozendict(), # Todo: Implement metadata compression
                    values = Enum(tuple(v for child in child_set for v in child.data.values.values)),
                )
                # Add the node to the cache
                id = type(self).add_to_cache(self.cache, node_data, _children)
            else:
                # If the group is size one just keep it
                id = child_set.pop().id
            new_children.append(id)
        id = self.add_to_cache(self.cache, node.data, tuple(sorted(new_children)))
        return self.cache[id]
    def compress(self) -> 'CompressedTree':
        return CompressedTree(cache = self.cache, root = self.collapse_children(self.root))
    def lookup(self, selection : dict[str, str]):
        nodes = [self.root]
        for _ in range(1000):
            found = False
            current_node = nodes[-1]
            for c in current_node.children:
                if selection.get(c.data.key, None) in c.data.values:
                    if found: 
                        raise RuntimeError("This tree is invalid, because it contains overlapping branches.")
                    nodes.append(c)
                    selection.pop(c.data.key)
                    found = True
            if not found:
                return nodes
        raise RuntimeError("Maximum node searches exceeded, the tree contains a loop or something is buggy.")
    # def reconstruct(self) -> Tree:
    #     def reconstruct_node(h : int) -> Tree:
    #         node = self.cache[h]
    #         dedup : dict[tuple[int, str], set[NodeId]] = defaultdict(set)
    #         for index in self.cache[h].children:
    #             child_node = self.cache[index]
    #             child_hash = hash(child_node.children)
    #             assert isinstance(child_node.values, Enum)
    #             dedup[(child_hash, child_node.key)].add(index)
    #         children = tuple(
    #             Tree(key = key, values = Enum(tuple(values)), 
    #             children = tuple(reconstruct_node(i) for i in self.cache[next(indices)].children)
    #             )
    #             for (_, key), indices in dedup.items()
    #         )
    #         return Tree(
    #             key = node.key,
    #             values = node.values,
    #             children = children,
    #         )
    #     return reconstruct_node(self.root)
--- a/tree_compresser/python_src/tree_traverser/CompressedTree.py
+++ b/tree_compresser/python_src/tree_traverser/CompressedTree.py
@ -1,5 +1,6 @@
 import json
 from collections import defaultdict
 from dataclasses import asdict, dataclass
 from pathlib import Path
 Tree = dict[str, "Tree"]
@ -13,6 +14,11 @@ class RefcountedDict(dict[str, int]):
    def __hash__(self):
        return hash(tuple(sorted(self.items())))
@dataclass
 class JSONNode:
    key: str
    values: list[str]
    children: list["JSONNode"]
 class CompressedTree():
    """
@ -101,6 +107,23 @@ class CompressedTree():
            return {f"{key}={','.join(values)}" : reconstruct_node(h, depth=depth+1) for (h, key), values in dedup.items()}
        return reconstruct_node(from_node or self.root_hash, depth=0)
    def to_json(self, max_depth=None, from_node=None) -> dict:
        def reconstruct_node(h : int, depth : int) -> list[JSONNode]:
            if max_depth is not None and depth > max_depth: 
                return {}
            dedup : dict[tuple[int, str], set[str]] = defaultdict(set)
            for k, h2 in self.cache[h].items():
                key, value = k.split("=")
                dedup[(h2, key)].add(value)
            return [JSONNode(
                key = key,
                values = list(values),
                children = reconstruct_node(h, depth=depth+1),
            ) for (h, key), values in dedup.items()]
        return asdict(reconstruct_node(from_node or self.root_hash, depth=0)[0])
    def __init__(self, tree : Tree):
        self.cache = {}
        self.empty_hash = hash(RefcountedDict({}))
@ -139,8 +162,8 @@ class CompressedTree():
        return list(loc.keys())
    def multi_match(self, request : dict[str, list[str]], loc = None):
        if not loc: return {"_END_" : {}}
        if loc is None: loc = self.tree
        if loc == {}: return {"_END_" : {}}
        matches = {}
        for request_key, request_values in request.items():
            for request_value in request_values:
--- a/tree_compresser/python_src/tree_traverser/DataCubeTree.py
+++ b/tree_compresser/python_src/tree_traverser/DataCubeTree.py
@ -0,0 +1,267 @@
 import dataclasses
 from dataclasses import dataclass, field
 from typing import Any, Callable, Hashable, Literal, Mapping
 from frozendict import frozendict
 from .tree_formatters import HTML, node_tree_to_html, node_tree_to_string
 from .value_types import DateRange, Enum, IntRange, TimeRange, Values
 def values_from_json(obj) -> Values:
    if isinstance(obj, list): 
        return Enum(tuple(obj))
    match obj["dtype"]:
        case "date": return DateRange(**obj)
        case "time": return TimeRange(**obj)
        case "int": return IntRange(**obj)
        case _: raise ValueError(f"Unknown dtype {obj['dtype']}")
 # In practice use a frozendict
 Metadata = Mapping[str, str | int | float | bool]
@dataclass(frozen=True, eq=True, order=True)
 class NodeData:
    key: str
    values: Values
    metadata: dict[str, tuple[Hashable, ...]] = field(default_factory=frozendict, compare=False)
    def summary(self) -> str:
        return f"{self.key}={self.values.summary()}" if self.key != "root" else "root"
@dataclass(frozen=True, eq=True, order=True)
 class Tree:
    data: NodeData
    children: tuple['Tree', ...]
    @property
    def key(self) -> str:
        return self.data.key
    @property
    def values(self) -> Values:
        return self.data.values
    @property
    def metadata(self) -> frozendict[str, Any]:
        return self.data.metadata
    def summary(self) -> str:
        return self.data.summary()
    @classmethod
    def make(cls, key : str, values : Values, children, **kwargs) -> 'Tree':
        return cls(
            data = NodeData(key, values,  metadata = kwargs.get("metadata", frozendict())
            ),
            children = tuple(sorted(children)),
        )
    @classmethod
    def from_json(cls, json: dict) -> 'Tree':
        def from_json(json: dict) -> Tree:
            return Tree.make(
                key=json["key"],
                values=values_from_json(json["values"]),
                metadata=json["metadata"] if "metadata" in json else {},
                children=tuple(from_json(c) for c in json["children"])
            )
        return from_json(json)
    @classmethod
    def from_dict(cls, d: dict) -> 'Tree':
        def from_dict(d: dict) -> tuple[Tree, ...]:
            return tuple(Tree.make(
                key=k.split("=")[0],
                values=Enum(tuple(k.split("=")[1].split("/"))),
                children=from_dict(children)
            ) for k, children in d.items())
        return Tree.make(key = "root",
                              values=Enum(("root",)),
                              children = from_dict(d))
    @classmethod
    def empty(cls) -> 'Tree':
        return cls.make("root", Enum(("root",)), [])
    def __str__(self):
        return "".join(node_tree_to_string(node=self))
    def html(self, depth = 2, collapse = True) -> HTML:
        return HTML(node_tree_to_html(self, depth = depth, collapse = collapse))
    def _repr_html_(self) -> str:
        return node_tree_to_html(self, depth = 2, collapse = True)
    def __getitem__(self, args) -> 'Tree':
        key, value = args
        for c in self.children:
            if c.key == key and value in c.values:
                data = dataclasses.replace(c.data, values = Enum((value,)))
                return dataclasses.replace(c, data = data)
        raise KeyError(f"Key {key} not found in children of {self.key}")
    def print(self, depth = None):
        print("".join(cc for c in self.children for cc in node_tree_to_string(node=c, depth = depth)))
    def transform(self, func: 'Callable[[Tree], Tree | list[Tree]]') -> 'Tree':
        """
        Call a function on every node of the tree, return one or more nodes.
        If multiple nodes are returned they each get a copy of the (transformed) children of the original node.
        Any changes to the children of a node will be ignored.
        """
        def transform(node: Tree) -> list[Tree]:
            children = [cc for c in node.children for cc in transform(c)]
            new_nodes = func(node)
            if isinstance(new_nodes, Tree):
                new_nodes = [new_nodes]
            return [dataclasses.replace(new_node, children = children)
                    for new_node in new_nodes]
        children = tuple(cc for c in self.children for cc in transform(c))
        return dataclasses.replace(self, children = children)
    def guess_datatypes(self) -> 'Tree':
        def guess_datatypes(node: Tree) -> list[Tree]:
            # Try to convert enum values into more structured types
            children = tuple(cc for c in node.children for cc in guess_datatypes(c))
            if isinstance(node.values, Enum):
                match node.key:
                    case "time": range_class = TimeRange
                    case "date": range_class = DateRange
                    case _: range_class = None
                if range_class is not None:
                    return [
                        dataclasses.replace(node, values = range, children = children)
                        for range in range_class.from_strings(node.values.values)
                    ]
            return [dataclasses.replace(node, children = children)]
        children = tuple(cc for c in self.children for cc in guess_datatypes(c))
        return dataclasses.replace(self, children = children)
    def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Tree':
        # make all values lists
        selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
        def not_none(xs): return tuple(x for x in xs if x is not None)
        def select(node: Tree) -> Tree | None: 
            # Check if the key is specified in the selection
            if node.key not in selection: 
                if mode == "strict":
                    return None
                return dataclasses.replace(node, children = not_none(select(c) for c in node.children))
            # If the key is specified, check if any of the values match
            values = Enum(tuple(c for c in selection[node.key] if c in node.values))
            if not values: 
                return None 
            return dataclasses.replace(node, values = values, children = not_none(select(c) for c in node.children))
        return dataclasses.replace(self, children = not_none(select(c) for c in self.children))
    @staticmethod
    def _insert(position: "Tree", identifier : list[tuple[str, list[str]]]):
        """
        This algorithm goes as follows:
        We're at a particular node in the tree, and we have a list of key-values pairs that we want to insert.
        We take the first key values pair
        key, values = identifier.pop(0)
        The general idea is to insert key, values into the current node and use recursion to handle the rest of the identifier.
        We have two sources of values with possible overlap. The values to insert and the values attached to the children of this node.
        For each value coming from either source we put it in one of three categories:
            1) Values that exist only in the already existing child. (Coming exclusively from position.children)
            2) Values that exist in both a child and the new values.
            3) Values that exist only in the new values.
        Thus we add the values to insert to a set, and loop over the children.
        For each child we partition its values into the three categories.
        For 1) we create a new child node with the key, reduced set of values and the same children.
        For 2)
            Create a new child node with the key, and the values in group 2
            Recurse to compute the children
        Once we have finished looping over children we know all the values left over came exclusively from the new values.
        So we:
            Create a new node with these values.
            Recurse to compute the children
        Finally we return the node with all these new children.
        """
        if not identifier:
            return position
        key, values = identifier.pop(0)
        # print(f"Inserting {key}={values} into {position.summary()}")
        # Determine which children have this key
        possible_children = {c : [] for c in position.children if c.key == key}
        entirely_new_values = []
        # For each value check it is already in one of the children
        for v in values:
            for c in possible_children:
                if v in c.values:
                    possible_children[c].append(v)
                    break
            else: # only executed if the loop did not break
                # If none of the children have this value, add it to the new child pile
                entirely_new_values.append(v)
        # d = {p.summary() : v for p, v in possible_children.items()}
        # print(f"  {d} new_values={entirely_new_values}")
        new_children = []
        for c, affected in possible_children.items():
            if not affected:
                new_children.append(c)
                continue
            unaffected = [x for x in c.values if x not in affected]
            if unaffected:
                unaffected_node = Tree.make(c.key, Enum(tuple(unaffected)), c.children)
                new_children.append(unaffected_node) # Add the unaffected part of this child
            if affected: # This check is not technically necessary, but it makes the code more readable
                new_node = Tree.make(key, Enum(tuple(affected)), [])
                new_node = Tree._insert(new_node, identifier)
                new_children.append(new_node) # Add the affected part of this child
        # If there are any values not in any of the existing children, add them as a new child
        if entirely_new_values:
            new_node = Tree.make(key, Enum(tuple(entirely_new_values)), [])
            new_children.append(Tree._insert(new_node, identifier))
        return Tree.make(position.key, position.values, new_children)
    def insert(self, identifier : dict[str, list[str]]) -> 'Tree':
        insertion = [(k, v) for k, v in identifier.items()]
        return Tree._insert(self, insertion)
    def to_list_of_cubes(self):
        def to_list_of_cubes(node: Tree) -> list[list[Tree]]:
            return [[node] + sub_cube for c in node.children for sub_cube in to_list_of_cubes(c)]
        return to_list_of_cubes(self)
    def info(self):
        cubes = self.to_list_of_cubes()
        print(f"Number of distinct paths: {len(cubes)}")
--- a/tree_compresser/python_src/tree_traverser/tree_formatters.py
+++ b/tree_compresser/python_src/tree_traverser/tree_formatters.py
@ -0,0 +1,116 @@
 from dataclasses import dataclass
 from typing import Iterable, Protocol, Sequence, runtime_checkable
@runtime_checkable
 class TreeLike(Protocol):
    @property
    def children(self) -> Sequence["TreeLike"]: ...  # Supports indexing like node.children[i]
    def summary(self, **kwargs) -> str: ...
@dataclass(frozen=True)
 class HTML():
    html: str
    def _repr_html_(self):
        return self.html
 def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, TreeLike]:
    """
    Extracts a summarized representation of the node while collapsing single-child paths.
    Returns the summary string and the last node in the chain that has multiple children.
    """
    summaries = []
    while True:
        summary = node.summary(**kwargs)
        if len(summary) > 50:
            summary = summary[:50] + "..."
        summaries.append(summary)
        if not collapse: 
            break
        # Move down if there's exactly one child, otherwise stop
        if len(node.children) != 1:
            break
        node = node.children[0]
    return ", ".join(summaries), node
 def node_tree_to_string(node : TreeLike, prefix : str = "", depth = None) -> Iterable[str]:
    summary, node = summarize_node(node)
    if depth is not None and depth <= 0:
        yield summary + " - ...\n"
        return
    # Special case for nodes with only a single child, this makes the printed representation more compact
    elif len(node.children) == 1:
        yield summary + ", "
        yield from node_tree_to_string(node.children[0], prefix, depth = depth)
        return
    else:
        yield summary + "\n"
    for index, child in enumerate(node.children):
        connector = "└── " if index == len(node.children) - 1 else "├── "
        yield prefix + connector
        extension = "    " if index == len(node.children) - 1 else "│   "
        yield from node_tree_to_string(child, prefix + extension, depth = depth - 1 if depth is not None else None)
 def _node_tree_to_html(node : TreeLike, prefix : str = "", depth = 1, connector = "", **kwargs) -> Iterable[str]:
    summary, node = summarize_node(node, **kwargs)
    if len(node.children) == 0:
        yield f'<span class="leaf">{connector}{summary}</span>'
        return
    else:
        open = "open" if depth > 0 else ""
        yield f"<details {open}><summary>{connector}{summary}</summary>"
    for index, child in enumerate(node.children):
        connector = "└── " if index == len(node.children) - 1 else "├── "
        extension = "    " if index == len(node.children) - 1 else "│   "
        yield from _node_tree_to_html(child, prefix + extension, depth = depth - 1, connector = prefix+connector, **kwargs)
    yield "</details>"
 def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
        css = """
        <style>
        .qubed-tree-view {
            font-family: monospace;
            white-space: pre;
        }
        .qubed-tree-view details {
            # display: inline;
            margin-left: 0;
        }
        .qubed-tree-view summary {
            list-style: none;
            cursor: pointer;
            text-overflow: ellipsis;
            overflow: hidden;
            text-wrap: nowrap;
            display: block;
        }
        .qubed-tree-view .leaf {
            text-overflow: ellipsis;
            overflow: hidden;
            text-wrap: nowrap;
            display: block;
        }
        .qubed-tree-view summary:hover,span.leaf:hover {
            background-color: #f0f0f0;
        }
        .qubed-tree-view details > summary::after {
            content: ' ▲';
        }
        .qubed-tree-view details:not([open]) > summary::after {
            content: " ▼";
        }
        </style>
        """
        nodes = "".join(_node_tree_to_html(node=node, depth=depth, **kwargs))
        return f"{css}<pre class='qubed-tree-view'>{nodes}</pre>"
--- a/tree_compresser/python_src/tree_traverser/trie.py
+++ b/tree_compresser/python_src/tree_traverser/trie.py
@ -0,0 +1,40 @@
 from dataclasses import dataclass, field
 character = str
@dataclass(unsafe_hash=True)
 class TrieNode():
    parent: "TrieNode | None"
    parent_char: character
    children: dict[character, "TrieNode"] = field(default_factory=dict)
@dataclass 
 class Trie:
    root: TrieNode = field(default_factory=lambda: TrieNode(None, ""))
    reverse_lookup: dict[int, TrieNode] = field(default_factory=dict)
    def insert(self, word: str):
        node = self.root
        for char in word:
            if char not in node.children:
                new_node = TrieNode(node, char)
                node.children[char] = new_node
            node = node.children[char]
        n_id = id(node)
        if n_id not in self.reverse_lookup:
            self.reverse_lookup[n_id] = node
        return n_id
    def lookup_by_id(self, n_id: int):
        leaf_node = self.reverse_lookup[n_id]
        string = []
        while leaf_node.parent is not None:
            string.append(leaf_node.parent_char)
            leaf_node = leaf_node.parent
        return "".join(reversed(string))
--- a/tree_compresser/python_src/tree_traverser/value_types.py
+++ b/tree_compresser/python_src/tree_traverser/value_types.py
@ -0,0 +1,214 @@
 import dataclasses
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from typing import Any, Iterable, Literal
@dataclass(frozen=True)
 class Values(ABC):
    @abstractmethod
    def summary(self) -> str:
        pass
    @abstractmethod
    def __len__(self) -> int:
        pass
    @abstractmethod
    def __contains__(self, value: Any) -> bool:
        pass
    @abstractmethod
    def from_strings(self, values: Iterable[str]) -> list['Values']:
        pass
@dataclass(frozen=True, order=True)
 class Enum(Values):
    """
    The simplest kind of key value is just a list of strings.
    summary -> string1/string2/string....
    """
    values: tuple[Any, ...]
    def __post_init__(self):
        assert isinstance(self.values, tuple)
    def __iter__(self):
        return iter(self.values)
    def __len__(self) -> int:
        return len(self.values)
    def summary(self) -> str:
        return '/'.join(map(str, sorted(self.values)))
    def __contains__(self, value: Any) -> bool:
        return value in self.values
    def from_strings(self, values: Iterable[str]) -> list['Values']:
        return [Enum(tuple(values))]
@dataclass(frozen=True)
 class Range(Values, ABC):
    dtype: str = dataclasses.field(kw_only=True)
@dataclass(frozen=True)
 class DateRange(Range):
    start: date
    end: date
    step: timedelta
    dtype: Literal["date"] = dataclasses.field(kw_only=True, default="date")
    @classmethod
    def from_strings(self, values: Iterable[str]) -> list['DateRange']:
        dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values])
        if len(dates) < 2:
            return [DateRange(
                start=dates[0],
                end=dates[0],
                step=timedelta(days=0)
            )]
        ranges = []
        current_range, dates = [dates[0],], dates[1:]
        while len(dates) > 1:
            if dates[0] - current_range[-1] == timedelta(days=1):
                current_range.append(dates.pop(0))
            elif len(current_range) == 1:
                ranges.append(DateRange(
                start=current_range[0],
                end=current_range[0],
                step=timedelta(days=0)
                ))
                current_range = [dates.pop(0),]
            else:
                ranges.append(DateRange(
                start=current_range[0],
                end=current_range[-1],
                step=timedelta(days=1)
                ))
                current_range = [dates.pop(0),]
        return ranges
    def __contains__(self, value: Any) -> bool:
        v = datetime.strptime(value, "%Y%m%d").date()
        return self.start <= v <= self.end and (v - self.start) % self.step == 0
    def __len__(self) -> int:
        return (self.end - self.start) // self.step
    def summary(self) -> str:
        def fmt(d): return d.strftime("%Y%m%d")
        if self.step == timedelta(days=0):
            return f"{fmt(self.start)}"
        if self.step == timedelta(days=1):
            return f"{fmt(self.start)}/to/{fmt(self.end)}"
        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
@dataclass(frozen=True)
 class TimeRange(Range):
    start: int
    end: int
    step: int
    dtype: Literal["time"] = dataclasses.field(kw_only=True, default="time")
    @classmethod
    def from_strings(self, values: Iterable[str]) -> list['TimeRange']:
        if len(values) == 0: return []
        times = sorted([int(v) for v in values])
        if len(times) < 2:
            return [TimeRange(
                start=times[0],
                end=times[0],
                step=100
            )]
        ranges = []
        current_range, times = [times[0],], times[1:]
        while len(times) > 1:
            if times[0] - current_range[-1] == 1:
                current_range.append(times.pop(0))
            elif len(current_range) == 1:
                ranges.append(TimeRange(
                start=current_range[0],
                end=current_range[0],
                step=0
                ))
                current_range = [times.pop(0),]
            else:
                ranges.append(TimeRange(
                start=current_range[0],
                end=current_range[-1],
                step=1
                ))
                current_range = [times.pop(0),]
        return ranges
    def __len__(self) -> int:
        return (self.end - self.start) // self.step
    def summary(self) -> str:
        def fmt(d): return f"{d:04d}"
        if self.step == 0:
            return f"{fmt(self.start)}"
        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
    def __contains__(self, value: Any) -> bool:
        v = int(value)
        return self.start <= v <= self.end and (v - self.start) % self.step == 0
@dataclass(frozen=True)
 class IntRange(Range):
    start: int
    end: int
    step: int
    dtype: Literal["int"] = dataclasses.field(kw_only=True, default="int")
    def __len__(self) -> int:
        return (self.end - self.start) // self.step
    def summary(self) -> str:
        def fmt(d): return d.strftime("%Y%m%d")
        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
    def __contains__(self, value: Any) -> bool:
        v = int(value)
        return self.start <= v <= self.end and (v - self.start) % self.step == 0
    @classmethod
    def from_strings(self, values: Iterable[str]) -> list['IntRange']:
        if len(values) == 0: return []
        ints = sorted([int(v) for v in values])
        if len(ints) < 2:
            return [IntRange(
                start=ints[0],
                end=ints[0],
                step=0
            )]
        ranges = []
        current_range, ints = [ints[0],], ints[1:]
        while len(ints) > 1:
            if ints[0] - current_range[-1] == 1:
                current_range.append(ints.pop(0))
            elif len(current_range) == 1:
                ranges.append(IntRange(
                start=current_range[0],
                end=current_range[0],
                step=0
                ))
                current_range = [ints.pop(0),]
            else:
                ranges.append(IntRange(
                start=current_range[0],
                end=current_range[-1],
                step=1
                ))
                current_range = [ints.pop(0),]
        return ranges
--- a/tree_compresser/tests/open_climate_dt.py
+++ b/tree_compresser/tests/open_climate_dt.py
@ -3,15 +3,15 @@ from pathlib import Path
 from tree_traverser import CompressedTree
-data_path = Path("/home/eouser/qubed/config/climate-dt/compressed_tree.json")
+data_path = Path("./config/climate-dt/compressed_tree.json")
 # Print size of file
 print(f"climate dt compressed tree: {data_path.stat().st_size // 1e6:.1f} MB")
 print("Opening json file")
 compressed_tree = CompressedTree.load(data_path)
-print(compressed_tree.reconstruct_compressed_ecmwf_style())
+print(compressed_tree.to_json())
-# print("Outputting compressed tree ecmwf style")
+print("Outputting compressed tree ecmwf style")
-# with open("data/compressed_tree_climate_dt_ecmwf_style.json", "w") as f:
+with open("config/climate-dt/new_format.json", "w") as f:
-#     json.dump(compressed_tree.reconstruct_compressed_ecmwf_style(), f)
+    json.dump(compressed_tree.to_json(), f)