From 4c941d34f8013f33ff3bc6d70af04a7242370356 Mon Sep 17 00:00:00 2001
From: Tom <thomas.hodson@ecmwf.int>
Date: Thu, 20 Feb 2025 15:51:02 +0000
Subject: [PATCH] Add fiab docs

---
 docs/fiab.md                    | 103 ++++++++++++++++++++++++++++++++
 docs/index.md                   |   1 +
 fiab/extract.py                 |  41 ++++++++-----
 src/python/qubed/Qube.py        |  30 +++++++++-
 src/python/qubed/value_types.py |   6 ++
 tests/test_conversions.py       |  16 +++++
 tests/test_iteration.py         |  18 +-----
 7 files changed, 179 insertions(+), 36 deletions(-)
 create mode 100644 docs/fiab.md
 create mode 100644 tests/test_conversions.py

diff --git a/docs/fiab.md b/docs/fiab.md
new file mode 100644
index 0000000..bd26297
--- /dev/null
+++ b/docs/fiab.md
@@ -0,0 +1,103 @@
+---
+jupytext:
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.16.4
+---
+
+# Fiab
+
+## Model Selection
+
+This is a demo of using qubed to select from a set of forecast models that each produce a set of output variables.
+
+First let's construct some models represented as qubes:
+
+```{code-cell} python3
+from qubed import Qube
+model_1 = Qube.from_datacube({
+        "levtype": "pl",
+        "param" : ["q", "t", "u", "v", "w", "z"],
+        "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
+    }) | Qube.from_datacube({
+        "levtype": "sfc",
+        "param" : ["10u", "10v", "2d", "2t", "cp", "msl", "skt", "sp", "tcw", "tp"],
+})
+
+model_1 = "model=1" / ("frequency=6h" / model_1)
+model_1
+```
+
+This is the most complete model. Now let's do one with fewer variables and levels:
+
+```{code-cell} python3
+model_2 = Qube.from_datacube({
+        "levtype": "pl",
+        "param" : ["q", "t"],
+        "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
+    }) | Qube.from_datacube({
+        "levtype": "sfc",
+        "param" : ["2t", "cp", "msl"],
+})
+model_2 = "model=2" / ("frequency=continuous" / model_2)
+```
+
+```{code-cell} python3
+model_3 = Qube.from_datacube({
+        "levtype": "pl",
+        "param" : ["q", "t"],
+        "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
+    }) | Qube.from_datacube({
+        "levtype": "sfc",
+        "param" : ["2t", "cp", "msl"],
+})
+model_3 = "model=3" / ("frequency=6h" / model_3)
+model_3
+```
+
+
+Now we can combine the three models into a single qube:
+
+```{code-cell} python3
+all_models = model_1 | model_2 | model_3
+all_models
+```
+
+Now we can perform queries over the models. We can get all models that produce 2m temperature:
+```{code-cell} python3
+all_models.select({
+    "param" : "2t",
+})
+```
+
+Filter on both parameter and frequency:
+
+```{code-cell} python3
+all_models.select({
+    "param" : "2t",
+    "frequency": "continuous",
+})
+```
+
+Find all models that have some overlap with this set of parameters:
+
+```{code-cell} python3
+all_models.select({
+    "param" : ["q", "t", "u", "v"],
+})
+```
+
+## Choosing a set of models based on the requested parameter set
+
+```{code-cell} python3
+all_models.select({
+    "param" : ["q", "t", "u", "v"],
+    "frequency": "6h",
+})
+```
+
+<!-- ## Choosing the fewest models needed to cover the requested parameter set -->
+
+<!-- ```{code-cell} python3 -->
diff --git a/docs/index.md b/docs/index.md
index 628a5f1..53398e0 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -16,6 +16,7 @@ quickstart.md
 api.md
 development.md
 algorithms.md
+fiab.md
 ```
 
 Qubed provides a datastructure called a Qube which represents sets of data identified by multiple key value pairs as a tree of datacubes. To understand what that means go to [Background](background.md), to just start using the library skip straight to the [Quickstart](quickstart.md).
diff --git a/fiab/extract.py b/fiab/extract.py
index 232747d..089a74e 100644
--- a/fiab/extract.py
+++ b/fiab/extract.py
@@ -1,4 +1,3 @@
-
 import json
 from collections import defaultdict
 
@@ -8,26 +7,36 @@ predicted_indices = [*metadata['data_indices']['data']['output']['prognostic'],
 variables = metadata['dataset']["variables"]
 variables = [variables[i] for i in predicted_indices]
 
-print('Variables:', variables)
+# print('Raw Model Variables:', variables)
 
+# Split variables between pressure and surface
 surface_variables = [v for v in variables if '_' not in v]
-pressure_level_variables = [v for v in variables if '_' in v]
 
-pressure_levels = sorted(set([v.split('_')[-1] for v in pressure_level_variables]))
-pressure_level_variables = sorted(set([v.split('_')[0] for v in pressure_level_variables]))
-
-levels_for_variables = defaultdict(list)
+# Collect the levels for each pressure variable
+level_variables = defaultdict(list)
 for v in variables:
-    if "_" in v:
-        variable, level = v.split('_')
-        levels_for_variables[variable].append(level)
+    if '_' in v:
+        variable, level = v.split("_")
+        level_variables[variable].append(int(level))
 
-print('Levels for variables:', levels_for_variables)
+# print(level_variables)
 
-print('Pressure level variables:', pressure_level_variables)
-print('Pressure levels:', sorted([int(p) for p in pressure_levels]))
+# Use qubed library to contruct tree
+from qubed import Qube
 
-print('Surface variables:', surface_variables)
+model_tree = Qube.empty()
 
-frequency = metadata['config']['data']['frequency']
-print("Frequency:", frequency)
\ No newline at end of file
+for variable, levels in level_variables.items():
+    model_tree = model_tree | Qube.from_datacube({
+        "levtype": "pl",
+        "param" : variable,
+        "level" : levels,
+    })
+
+for variable in surface_variables:
+    model_tree = model_tree | Qube.from_datacube({
+        "levtype": "sfc",
+        "param" : variable,
+    })
+
+print(model_tree.to_json())
\ No newline at end of file
diff --git a/src/python/qubed/Qube.py b/src/python/qubed/Qube.py
index 34616a5..b649629 100644
--- a/src/python/qubed/Qube.py
+++ b/src/python/qubed/Qube.py
@@ -66,11 +66,21 @@ class Qube:
             return Qube.make(
                 key=json["key"],
                 values=values_from_json(json["values"]),
-                metadata=json["metadata"] if "metadata" in json else {},
+                metadata=frozendict(json["metadata"]) if "metadata" in json else {},
                 children=(from_json(c) for c in json["children"]),
             )
         return from_json(json)
     
+    def to_json(self) -> dict:
+        def to_json(node: Qube) -> dict:
+            return {
+                "key": node.key,
+                "values": node.values.to_json(),
+                "metadata": dict(node.metadata),
+                "children": [to_json(c) for c in node.children]
+            }
+        return to_json(self)
+    
     @classmethod
     def from_dict(cls, d: dict) -> 'Qube':
         def from_dict(d: dict) -> list[Qube]:
@@ -102,6 +112,12 @@ class Qube:
     def _repr_html_(self) -> str:
         return node_tree_to_html(self, depth = 2, collapse = True)
     
+    # Allow "key=value/value" / qube to prepend keys
+    def __rtruediv__(self, other: str) -> "Qube":
+        key, values = other.split("=")
+        values = QEnum((values.split("/")))
+        return Qube.root_node([Qube.make(key, values, self.children)])
+    
     def __or__(self, other: "Qube") -> "Qube":
         return set_operations.operation(self, other, set_operations.SetOperation.UNION, type(self))
     
@@ -176,7 +192,7 @@ class Qube:
         return dataclasses.replace(self, children = children)
 
     
-    def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Qube':
+    def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed", prune=True) -> 'Qube':
         # make all values lists
         selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
 
@@ -187,7 +203,15 @@ class Qube:
             if node.key not in selection: 
                 if mode == "strict":
                     return None
-                return dataclasses.replace(node, children = not_none(select(c) for c in node.children))
+                
+                new_children = not_none(select(c) for c in node.children)
+                
+                # prune==true then remove any non-leaf nodes
+                # which have had all their children removed
+                if prune and node.children and not new_children:
+                    return None
+                
+                return dataclasses.replace(node, children = new_children)
             
             # If the key is specified, check if any of the values match
             values = QEnum((c for c in selection[node.key] if c in node.values))
diff --git a/src/python/qubed/value_types.py b/src/python/qubed/value_types.py
index 816cb2a..2985d41 100644
--- a/src/python/qubed/value_types.py
+++ b/src/python/qubed/value_types.py
@@ -30,6 +30,10 @@ class Values(ABC):
     def min(self):
         pass
 
+    @abstractmethod
+    def to_json(self):
+        pass
+
 T = TypeVar("T")
 EnumValuesType = FrozenSet[T]
 @dataclass(frozen=True, order=True)
@@ -61,6 +65,8 @@ class QEnum(Values):
         return [type(self)(tuple(values))]
     def min(self):
         return min(self.values)
+    def to_json(self):
+        return list(self.values)
 
 @dataclass(frozen=True)
 class Range(Values, ABC):
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
new file mode 100644
index 0000000..4d96cfd
--- /dev/null
+++ b/tests/test_conversions.py
@@ -0,0 +1,16 @@
+from qubed import Qube
+
+
+def test_json_round_trip():
+    u = Qube.from_dict({
+        "class=d1" : {
+            "dataset=climate-dt/weather-dt" : {
+                "generation=1/2/3/4" : {},
+            },
+            "dataset=another-value" : {
+                "generation=1/2/3" : {},
+            },
+        }
+    })
+    json = u.to_json()
+    assert Qube.from_json(json) == u
\ No newline at end of file
diff --git a/tests/test_iteration.py b/tests/test_iteration.py
index ae7f881..a997bf6 100644
--- a/tests/test_iteration.py
+++ b/tests/test_iteration.py
@@ -16,20 +16,4 @@ def test_iter_leaves_simple():
         {"a" : '2', "b" : '2'},
     ]
 
-    assert set(make_hashable(q.leaves())) == set(make_hashable(entries))
-
-# def test_iter_leaves():
-#     d = {
-#         "class=od" : {
-#             "expver=0001": {"param=1":{}, "param=2":{}},
-#             "expver=0002": {"param=1":{}, "param=2":{}},
-#         },
-#         "class=rd" : {
-#             "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
-#             "expver=0002": {"param=1":{}, "param=2":{}},
-#         },
-#     }
-#     q = Qube.from_dict(d)
-#     r = Qube.from_dict(d)
-
-#     assert q == r
\ No newline at end of file
+    assert set(make_hashable(q.leaves())) == set(make_hashable(entries))
\ No newline at end of file