From 4c941d34f8013f33ff3bc6d70af04a7242370356 Mon Sep 17 00:00:00 2001 From: Tom Date: Thu, 20 Feb 2025 15:51:02 +0000 Subject: [PATCH] Add fiab docs --- docs/fiab.md | 103 ++++++++++++++++++++++++++++++++ docs/index.md | 1 + fiab/extract.py | 41 ++++++++----- src/python/qubed/Qube.py | 30 +++++++++- src/python/qubed/value_types.py | 6 ++ tests/test_conversions.py | 16 +++++ tests/test_iteration.py | 18 +----- 7 files changed, 179 insertions(+), 36 deletions(-) create mode 100644 docs/fiab.md create mode 100644 tests/test_conversions.py diff --git a/docs/fiab.md b/docs/fiab.md new file mode 100644 index 0000000..bd26297 --- /dev/null +++ b/docs/fiab.md @@ -0,0 +1,103 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.16.4 +--- + +# Fiab + +## Model Selection + +This is a demo of using qubed to select from a set of forecast models that each produce a set of output variables. + +First let's construct some models represented as qubes: + +```{code-cell} python3 +from qubed import Qube +model_1 = Qube.from_datacube({ + "levtype": "pl", + "param" : ["q", "t", "u", "v", "w", "z"], + "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000], + }) | Qube.from_datacube({ + "levtype": "sfc", + "param" : ["10u", "10v", "2d", "2t", "cp", "msl", "skt", "sp", "tcw", "tp"], +}) + +model_1 = "model=1" / ("frequency=6h" / model_1) +model_1 +``` + +This is the most complete model. Now let's do one with fewer variables and levels: + +```{code-cell} python3 +model_2 = Qube.from_datacube({ + "levtype": "pl", + "param" : ["q", "t"], + "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000], + }) | Qube.from_datacube({ + "levtype": "sfc", + "param" : ["2t", "cp", "msl"], +}) +model_2 = "model=2" / ("frequency=continuous" / model_2) +``` + +```{code-cell} python3 +model_3 = Qube.from_datacube({ + "levtype": "pl", + "param" : ["q", "t"], + "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000], + }) | Qube.from_datacube({ + "levtype": "sfc", + "param" : ["2t", "cp", "msl"], +}) +model_3 = "model=3" / ("frequency=6h" / model_3) +model_3 +``` + + +Now we can combine the three models into a single qube: + +```{code-cell} python3 +all_models = model_1 | model_2 | model_3 +all_models +``` + +Now we can perform queries over the models. We can get all models that produce 2m temperature: +```{code-cell} python3 +all_models.select({ + "param" : "2t", +}) +``` + +Filter on both parameter and frequency: + +```{code-cell} python3 +all_models.select({ + "param" : "2t", + "frequency": "continuous", +}) +``` + +Find all models that have some overlap with this set of parameters: + +```{code-cell} python3 +all_models.select({ + "param" : ["q", "t", "u", "v"], +}) +``` + +## Choosing a set of models based on the requested parameter set + +```{code-cell} python3 +all_models.select({ + "param" : ["q", "t", "u", "v"], + "frequency": "6h", +}) +``` + + + + diff --git a/docs/index.md b/docs/index.md index 628a5f1..53398e0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,6 +16,7 @@ quickstart.md api.md development.md algorithms.md +fiab.md ``` Qubed provides a datastructure called a Qube which represents sets of data identified by multiple key value pairs as a tree of datacubes. To understand what that means go to [Background](background.md), to just start using the library skip straight to the [Quickstart](quickstart.md). diff --git a/fiab/extract.py b/fiab/extract.py index 232747d..089a74e 100644 --- a/fiab/extract.py +++ b/fiab/extract.py @@ -1,4 +1,3 @@ - import json from collections import defaultdict @@ -8,26 +7,36 @@ predicted_indices = [*metadata['data_indices']['data']['output']['prognostic'], variables = metadata['dataset']["variables"] variables = [variables[i] for i in predicted_indices] -print('Variables:', variables) +# print('Raw Model Variables:', variables) +# Split variables between pressure and surface surface_variables = [v for v in variables if '_' not in v] -pressure_level_variables = [v for v in variables if '_' in v] -pressure_levels = sorted(set([v.split('_')[-1] for v in pressure_level_variables])) -pressure_level_variables = sorted(set([v.split('_')[0] for v in pressure_level_variables])) - -levels_for_variables = defaultdict(list) +# Collect the levels for each pressure variable +level_variables = defaultdict(list) for v in variables: - if "_" in v: - variable, level = v.split('_') - levels_for_variables[variable].append(level) + if '_' in v: + variable, level = v.split("_") + level_variables[variable].append(int(level)) -print('Levels for variables:', levels_for_variables) +# print(level_variables) -print('Pressure level variables:', pressure_level_variables) -print('Pressure levels:', sorted([int(p) for p in pressure_levels])) +# Use qubed library to contruct tree +from qubed import Qube -print('Surface variables:', surface_variables) +model_tree = Qube.empty() -frequency = metadata['config']['data']['frequency'] -print("Frequency:", frequency) \ No newline at end of file +for variable, levels in level_variables.items(): + model_tree = model_tree | Qube.from_datacube({ + "levtype": "pl", + "param" : variable, + "level" : levels, + }) + +for variable in surface_variables: + model_tree = model_tree | Qube.from_datacube({ + "levtype": "sfc", + "param" : variable, + }) + +print(model_tree.to_json()) \ No newline at end of file diff --git a/src/python/qubed/Qube.py b/src/python/qubed/Qube.py index 34616a5..b649629 100644 --- a/src/python/qubed/Qube.py +++ b/src/python/qubed/Qube.py @@ -66,11 +66,21 @@ class Qube: return Qube.make( key=json["key"], values=values_from_json(json["values"]), - metadata=json["metadata"] if "metadata" in json else {}, + metadata=frozendict(json["metadata"]) if "metadata" in json else {}, children=(from_json(c) for c in json["children"]), ) return from_json(json) + def to_json(self) -> dict: + def to_json(node: Qube) -> dict: + return { + "key": node.key, + "values": node.values.to_json(), + "metadata": dict(node.metadata), + "children": [to_json(c) for c in node.children] + } + return to_json(self) + @classmethod def from_dict(cls, d: dict) -> 'Qube': def from_dict(d: dict) -> list[Qube]: @@ -102,6 +112,12 @@ class Qube: def _repr_html_(self) -> str: return node_tree_to_html(self, depth = 2, collapse = True) + # Allow "key=value/value" / qube to prepend keys + def __rtruediv__(self, other: str) -> "Qube": + key, values = other.split("=") + values = QEnum((values.split("/"))) + return Qube.root_node([Qube.make(key, values, self.children)]) + def __or__(self, other: "Qube") -> "Qube": return set_operations.operation(self, other, set_operations.SetOperation.UNION, type(self)) @@ -176,7 +192,7 @@ class Qube: return dataclasses.replace(self, children = children) - def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Qube': + def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed", prune=True) -> 'Qube': # make all values lists selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()} @@ -187,7 +203,15 @@ class Qube: if node.key not in selection: if mode == "strict": return None - return dataclasses.replace(node, children = not_none(select(c) for c in node.children)) + + new_children = not_none(select(c) for c in node.children) + + # prune==true then remove any non-leaf nodes + # which have had all their children removed + if prune and node.children and not new_children: + return None + + return dataclasses.replace(node, children = new_children) # If the key is specified, check if any of the values match values = QEnum((c for c in selection[node.key] if c in node.values)) diff --git a/src/python/qubed/value_types.py b/src/python/qubed/value_types.py index 816cb2a..2985d41 100644 --- a/src/python/qubed/value_types.py +++ b/src/python/qubed/value_types.py @@ -30,6 +30,10 @@ class Values(ABC): def min(self): pass + @abstractmethod + def to_json(self): + pass + T = TypeVar("T") EnumValuesType = FrozenSet[T] @dataclass(frozen=True, order=True) @@ -61,6 +65,8 @@ class QEnum(Values): return [type(self)(tuple(values))] def min(self): return min(self.values) + def to_json(self): + return list(self.values) @dataclass(frozen=True) class Range(Values, ABC): diff --git a/tests/test_conversions.py b/tests/test_conversions.py new file mode 100644 index 0000000..4d96cfd --- /dev/null +++ b/tests/test_conversions.py @@ -0,0 +1,16 @@ +from qubed import Qube + + +def test_json_round_trip(): + u = Qube.from_dict({ + "class=d1" : { + "dataset=climate-dt/weather-dt" : { + "generation=1/2/3/4" : {}, + }, + "dataset=another-value" : { + "generation=1/2/3" : {}, + }, + } + }) + json = u.to_json() + assert Qube.from_json(json) == u \ No newline at end of file diff --git a/tests/test_iteration.py b/tests/test_iteration.py index ae7f881..a997bf6 100644 --- a/tests/test_iteration.py +++ b/tests/test_iteration.py @@ -16,20 +16,4 @@ def test_iter_leaves_simple(): {"a" : '2', "b" : '2'}, ] - assert set(make_hashable(q.leaves())) == set(make_hashable(entries)) - -# def test_iter_leaves(): -# d = { -# "class=od" : { -# "expver=0001": {"param=1":{}, "param=2":{}}, -# "expver=0002": {"param=1":{}, "param=2":{}}, -# }, -# "class=rd" : { -# "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}}, -# "expver=0002": {"param=1":{}, "param=2":{}}, -# }, -# } -# q = Qube.from_dict(d) -# r = Qube.from_dict(d) - -# assert q == r \ No newline at end of file + assert set(make_hashable(q.leaves())) == set(make_hashable(entries)) \ No newline at end of file