From 4bcb09180e0b16d7d54eaab12798582a306679b5 Mon Sep 17 00:00:00 2001 From: Tom Date: Wed, 19 Feb 2025 14:17:47 +0000 Subject: [PATCH] Add .datacubes() --- ROADMAP.md | 1 + docs/cmd.md | 10 +++++++-- docs/quickstart.md | 6 +----- src/python/qubed/Qube.py | 33 +++++++++++++++++++---------- src/python/qubed/tree_formatters.py | 2 +- tests/test_basic_operations.py | 6 ++++++ 6 files changed, 39 insertions(+), 19 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 7a06d13..49fd82f 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -6,6 +6,7 @@ Initial Python Implementation [x] Query with request [x] Iteration over leaves [x] Iteration over datacubes +[x] Command line creation from fdb list --compact [ ] Set up periodic updates to climate-dt/extremes-dt again [ ] Maybe also do production db? [ ] Do mars list to contraints conversion diff --git a/docs/cmd.md b/docs/cmd.md index 469738f..7197cdd 100644 --- a/docs/cmd.md +++ b/docs/cmd.md @@ -1,4 +1,4 @@ -### Command Line Usage +# Command Line Usage ```bash fdb list class=rd,expver=0001,... | qubed --from=fdblist --to=text @@ -18,4 +18,10 @@ fdb list class=rd,expver=0001,... | qubed --from=fdblist --to=text * `datacubes` * `constraints` -use `--input` and `--output` to specify input and output files respectively. \ No newline at end of file +use `--input` and `--output` to specify input and output files respectively. + + +There's some handy test data in the `tests/data` directory. For example: +```bash +gzip -dc tests/data/fdb_list_compact.gz| qubed --from=fdblist +``` \ No newline at end of file diff --git a/docs/quickstart.md b/docs/quickstart.md index ecf19ce..82a988f 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -79,11 +79,7 @@ for i, identifier in enumerate(cq.leaves()): Iterate over the datacubes: ```{code-cell} python3 -for i, datacube in enumerate(cq.datacubes()): - print(datacube) - if i > 10: - print("...") - break +cq.datacubes() ``` ### A Real World Example diff --git a/src/python/qubed/Qube.py b/src/python/qubed/Qube.py index 1e59010..34616a5 100644 --- a/src/python/qubed/Qube.py +++ b/src/python/qubed/Qube.py @@ -43,6 +43,10 @@ class Qube: )), ) + @classmethod + def root_node(cls, children: Iterable["Qube"]) -> 'Qube': + return cls.make("root", QEnum(("root",)), children) + @classmethod def from_datacube(cls, datacube: dict[str, str | Sequence[str]]) -> 'Qube': key_vals = list(datacube.items())[::-1] @@ -53,7 +57,7 @@ class Qube: values = [values] children = [cls.make(key, QEnum(values), children)] - return cls.make("root", QEnum(("root",)), children) + return cls.root_node(children) @classmethod @@ -77,13 +81,11 @@ class Qube: children=from_dict(children) ) for k, children in d.items()] - return Qube.make(key = "root", - values=QEnum(("root",)), - children = from_dict(d)) + return Qube.root_node(from_dict(d)) @classmethod def empty(cls) -> 'Qube': - return cls.make("root", QEnum(("root",)), []) + return Qube.root_node([]) def __str__(self, depth = None, name = None) -> str: @@ -93,8 +95,9 @@ class Qube: def print(self, depth = None, name: str | None = None): print(self.__str__(depth = depth, name = name)) - def html(self, depth = 2, collapse = True) -> HTML: - return HTML(node_tree_to_html(self, depth = depth, collapse = collapse)) + def html(self, depth = 2, collapse = True, name: str | None = None) -> HTML: + node = dataclasses.replace(self, data = RootNodeData(key = name, values=self.values, metadata=self.metadata)) if name is not None else self + return HTML(node_tree_to_html(node=node, depth = depth, collapse = collapse)) def _repr_html_(self) -> str: return node_tree_to_html(self, depth = 2, collapse = True) @@ -122,11 +125,18 @@ class Qube: else: yield leaf - def datacubes(self): - def to_list_of_cubes(node: Qube) -> list[list[Qube]]: - return [[node] + sub_cube for c in node.children for sub_cube in to_list_of_cubes(c)] + def datacubes(self) -> "Qube": + def to_list_of_cubes(node: Qube) -> Iterable[Qube]: + if not node.children: + yield node + # print(node.key) + for c in node.children: + # print(c) + for sub_cube in to_list_of_cubes(c): + yield dataclasses.replace(node, children=[sub_cube]) + - return to_list_of_cubes(self) + return Qube.root_node((q for c in self.children for q in to_list_of_cubes(c))) def __getitem__(self, args) -> 'Qube': key, value = args @@ -144,6 +154,7 @@ class Qube: @cached_property def n_nodes(self) -> int: + if self.key == "root" and not self.children: return 0 return 1 + sum(c.n_nodes for c in self.children) def transform(self, func: 'Callable[[Qube], Qube | list[Qube]]') -> 'Qube': diff --git a/src/python/qubed/tree_formatters.py b/src/python/qubed/tree_formatters.py index f99429e..836ab38 100644 --- a/src/python/qubed/tree_formatters.py +++ b/src/python/qubed/tree_formatters.py @@ -8,7 +8,7 @@ class TreeLike(Protocol): @property def children(self) -> Sequence["TreeLike"]: ... # Supports indexing like node.children[i] - def summary(self, **kwargs) -> str: ... + def summary(self) -> str: ... @dataclass(frozen=True) class HTML(): diff --git a/tests/test_basic_operations.py b/tests/test_basic_operations.py index c528e4d..67f4698 100644 --- a/tests/test_basic_operations.py +++ b/tests/test_basic_operations.py @@ -26,6 +26,12 @@ def test_n_leaves(): # Size is 3*3*3 + 1*1*1 = 27 + 1 assert q.n_leaves == 27 + 1 +def test_n_leaves_empty(): + assert Qube.empty().n_leaves == 0 + +def test_n_nodes_empty(): + assert Qube.empty().n_nodes == 0 + def test_union(): q = Qube.from_dict({"a=1/2/3" : {"b=1" : {}},})