Add fiab docs

2025-02-20 15:51:02 +00:00 · 2025-02-20 15:51:02 +00:00 · 4c941d34f8
commit 4c941d34f8
parent 11516a05ba
7 changed files with 179 additions and 36 deletions
--- a/docs/fiab.md
+++ b/docs/fiab.md
@ -0,0 +1,103 @@
 ---
 jupytext:
  text_representation:
    extension: .md
    format_name: myst
    format_version: 0.13
    jupytext_version: 1.16.4
 ---
 # Fiab
 ## Model Selection
 This is a demo of using qubed to select from a set of forecast models that each produce a set of output variables.
 First let's construct some models represented as qubes:
 ```{code-cell} python3
 from qubed import Qube
 model_1 = Qube.from_datacube({
        "levtype": "pl",
        "param" : ["q", "t", "u", "v", "w", "z"],
        "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
    }) | Qube.from_datacube({
        "levtype": "sfc",
        "param" : ["10u", "10v", "2d", "2t", "cp", "msl", "skt", "sp", "tcw", "tp"],
 })
 model_1 = "model=1" / ("frequency=6h" / model_1)
 model_1
 ```
 This is the most complete model. Now let's do one with fewer variables and levels:
 ```{code-cell} python3
 model_2 = Qube.from_datacube({
        "levtype": "pl",
        "param" : ["q", "t"],
        "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
    }) | Qube.from_datacube({
        "levtype": "sfc",
        "param" : ["2t", "cp", "msl"],
 })
 model_2 = "model=2" / ("frequency=continuous" / model_2)
 ```
 ```{code-cell} python3
 model_3 = Qube.from_datacube({
        "levtype": "pl",
        "param" : ["q", "t"],
        "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
    }) | Qube.from_datacube({
        "levtype": "sfc",
        "param" : ["2t", "cp", "msl"],
 })
 model_3 = "model=3" / ("frequency=6h" / model_3)
 model_3
 ```
 Now we can combine the three models into a single qube:
 ```{code-cell} python3
 all_models = model_1 | model_2 | model_3
 all_models
 ```
 Now we can perform queries over the models. We can get all models that produce 2m temperature:
 ```{code-cell} python3
 all_models.select({
    "param" : "2t",
 })
 ```
 Filter on both parameter and frequency:
 ```{code-cell} python3
 all_models.select({
    "param" : "2t",
    "frequency": "continuous",
 })
 ```
 Find all models that have some overlap with this set of parameters:
 ```{code-cell} python3
 all_models.select({
    "param" : ["q", "t", "u", "v"],
 })
 ```
 ## Choosing a set of models based on the requested parameter set
 ```{code-cell} python3
 all_models.select({
    "param" : ["q", "t", "u", "v"],
    "frequency": "6h",
 })
 ```
 <!-- ## Choosing the fewest models needed to cover the requested parameter set -->
 <!-- ```{code-cell} python3 -->
--- a/docs/index.md
+++ b/docs/index.md
@ -16,6 +16,7 @@ quickstart.md
 api.md
 development.md
 algorithms.md
 fiab.md
 ```
 Qubed provides a datastructure called a Qube which represents sets of data identified by multiple key value pairs as a tree of datacubes. To understand what that means go to [Background](background.md), to just start using the library skip straight to the [Quickstart](quickstart.md).
--- a/fiab/extract.py
+++ b/fiab/extract.py
@ -1,4 +1,3 @@
 import json
 from collections import defaultdict
@ -8,26 +7,36 @@ predicted_indices = [*metadata['data_indices']['data']['output']['prognostic'],
 variables = metadata['dataset']["variables"]
 variables = [variables[i] for i in predicted_indices]
-print('Variables:', variables)
+# print('Raw Model Variables:', variables)
 # Split variables between pressure and surface
 surface_variables = [v for v in variables if '_' not in v]
 pressure_level_variables = [v for v in variables if '_' in v]
-pressure_levels = sorted(set([v.split('_')[-1] for v in pressure_level_variables]))
+# Collect the levels for each pressure variable
-pressure_level_variables = sorted(set([v.split('_')[0] for v in pressure_level_variables]))
+level_variables = defaultdict(list)
 levels_for_variables = defaultdict(list)
 for v in variables:
-    if "_" in v:
+    if '_' in v:
-        variable, level = v.split('_')
+        variable, level = v.split("_")
-        levels_for_variables[variable].append(level)
+        level_variables[variable].append(int(level))
-print('Levels for variables:', levels_for_variables)
+# print(level_variables)
-print('Pressure level variables:', pressure_level_variables)
+# Use qubed library to contruct tree
-print('Pressure levels:', sorted([int(p) for p in pressure_levels]))
+from qubed import Qube
-print('Surface variables:', surface_variables)
+model_tree = Qube.empty()
-frequency = metadata['config']['data']['frequency']
+for variable, levels in level_variables.items():
-print("Frequency:", frequency)
+    model_tree = model_tree | Qube.from_datacube({
        "levtype": "pl",
        "param" : variable,
        "level" : levels,
    })
 for variable in surface_variables:
    model_tree = model_tree | Qube.from_datacube({
        "levtype": "sfc",
        "param" : variable,
    })
 print(model_tree.to_json())
--- a/src/python/qubed/Qube.py
+++ b/src/python/qubed/Qube.py
@ -66,11 +66,21 @@ class Qube:
            return Qube.make(
                key=json["key"],
                values=values_from_json(json["values"]),
-                metadata=json["metadata"] if "metadata" in json else {},
+                metadata=frozendict(json["metadata"]) if "metadata" in json else {},
                children=(from_json(c) for c in json["children"]),
            )
        return from_json(json)
    def to_json(self) -> dict:
        def to_json(node: Qube) -> dict:
            return {
                "key": node.key,
                "values": node.values.to_json(),
                "metadata": dict(node.metadata),
                "children": [to_json(c) for c in node.children]
            }
        return to_json(self)
    @classmethod
    def from_dict(cls, d: dict) -> 'Qube':
        def from_dict(d: dict) -> list[Qube]:
@ -102,6 +112,12 @@ class Qube:
    def _repr_html_(self) -> str:
        return node_tree_to_html(self, depth = 2, collapse = True)
    # Allow "key=value/value" / qube to prepend keys
    def __rtruediv__(self, other: str) -> "Qube":
        key, values = other.split("=")
        values = QEnum((values.split("/")))
        return Qube.root_node([Qube.make(key, values, self.children)])
    def __or__(self, other: "Qube") -> "Qube":
        return set_operations.operation(self, other, set_operations.SetOperation.UNION, type(self))
@ -176,7 +192,7 @@ class Qube:
        return dataclasses.replace(self, children = children)
-    def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Qube':
+    def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed", prune=True) -> 'Qube':
        # make all values lists
        selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
@ -187,7 +203,15 @@ class Qube:
            if node.key not in selection: 
                if mode == "strict":
                    return None
-                return dataclasses.replace(node, children = not_none(select(c) for c in node.children))
+                
                new_children = not_none(select(c) for c in node.children)
                # prune==true then remove any non-leaf nodes
                # which have had all their children removed
                if prune and node.children and not new_children:
                    return None
                return dataclasses.replace(node, children = new_children)
            # If the key is specified, check if any of the values match
            values = QEnum((c for c in selection[node.key] if c in node.values))
--- a/src/python/qubed/value_types.py
+++ b/src/python/qubed/value_types.py
@ -30,6 +30,10 @@ class Values(ABC):
    def min(self):
        pass
    @abstractmethod
    def to_json(self):
        pass
 T = TypeVar("T")
 EnumValuesType = FrozenSet[T]
@dataclass(frozen=True, order=True)
@ -61,6 +65,8 @@ class QEnum(Values):
        return [type(self)(tuple(values))]
    def min(self):
        return min(self.values)
    def to_json(self):
        return list(self.values)
@dataclass(frozen=True)
 class Range(Values, ABC):
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@ -0,0 +1,16 @@
 from qubed import Qube
 def test_json_round_trip():
    u = Qube.from_dict({
        "class=d1" : {
            "dataset=climate-dt/weather-dt" : {
                "generation=1/2/3/4" : {},
            },
            "dataset=another-value" : {
                "generation=1/2/3" : {},
            },
        }
    })
    json = u.to_json()
    assert Qube.from_json(json) == u
--- a/tests/test_iteration.py
+++ b/tests/test_iteration.py
@ -16,20 +16,4 @@ def test_iter_leaves_simple():
        {"a" : '2', "b" : '2'},
    ]
-    assert set(make_hashable(q.leaves())) == set(make_hashable(entries))
+    assert set(make_hashable(q.leaves())) == set(make_hashable(entries))
 # def test_iter_leaves():
 #     d = {
 #         "class=od" : {
 #             "expver=0001": {"param=1":{}, "param=2":{}},
 #             "expver=0002": {"param=1":{}, "param=2":{}},
 #         },
 #         "class=rd" : {
 #             "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
 #             "expver=0002": {"param=1":{}, "param=2":{}},
 #         },
 #     }
 #     q = Qube.from_dict(d)
 #     r = Qube.from_dict(d)
 #     assert q == r