Add fiab docs

This commit is contained in:
Tom 2025-02-20 15:51:02 +00:00
parent 11516a05ba
commit 4c941d34f8
7 changed files with 179 additions and 36 deletions

103
docs/fiab.md Normal file
View File

@ -0,0 +1,103 @@
---
jupytext:
text_representation:
extension: .md
format_name: myst
format_version: 0.13
jupytext_version: 1.16.4
---
# Fiab
## Model Selection
This is a demo of using qubed to select from a set of forecast models that each produce a set of output variables.
First let's construct some models represented as qubes:
```{code-cell} python3
from qubed import Qube
model_1 = Qube.from_datacube({
"levtype": "pl",
"param" : ["q", "t", "u", "v", "w", "z"],
"level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
}) | Qube.from_datacube({
"levtype": "sfc",
"param" : ["10u", "10v", "2d", "2t", "cp", "msl", "skt", "sp", "tcw", "tp"],
})
model_1 = "model=1" / ("frequency=6h" / model_1)
model_1
```
This is the most complete model. Now let's do one with fewer variables and levels:
```{code-cell} python3
model_2 = Qube.from_datacube({
"levtype": "pl",
"param" : ["q", "t"],
"level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
}) | Qube.from_datacube({
"levtype": "sfc",
"param" : ["2t", "cp", "msl"],
})
model_2 = "model=2" / ("frequency=continuous" / model_2)
```
```{code-cell} python3
model_3 = Qube.from_datacube({
"levtype": "pl",
"param" : ["q", "t"],
"level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
}) | Qube.from_datacube({
"levtype": "sfc",
"param" : ["2t", "cp", "msl"],
})
model_3 = "model=3" / ("frequency=6h" / model_3)
model_3
```
Now we can combine the three models into a single qube:
```{code-cell} python3
all_models = model_1 | model_2 | model_3
all_models
```
Now we can perform queries over the models. We can get all models that produce 2m temperature:
```{code-cell} python3
all_models.select({
"param" : "2t",
})
```
Filter on both parameter and frequency:
```{code-cell} python3
all_models.select({
"param" : "2t",
"frequency": "continuous",
})
```
Find all models that have some overlap with this set of parameters:
```{code-cell} python3
all_models.select({
"param" : ["q", "t", "u", "v"],
})
```
## Choosing a set of models based on the requested parameter set
```{code-cell} python3
all_models.select({
"param" : ["q", "t", "u", "v"],
"frequency": "6h",
})
```
<!-- ## Choosing the fewest models needed to cover the requested parameter set -->
<!-- ```{code-cell} python3 -->

View File

@ -16,6 +16,7 @@ quickstart.md
api.md
development.md
algorithms.md
fiab.md
```
Qubed provides a datastructure called a Qube which represents sets of data identified by multiple key value pairs as a tree of datacubes. To understand what that means go to [Background](background.md), to just start using the library skip straight to the [Quickstart](quickstart.md).

View File

@ -1,4 +1,3 @@
import json
from collections import defaultdict
@ -8,26 +7,36 @@ predicted_indices = [*metadata['data_indices']['data']['output']['prognostic'],
variables = metadata['dataset']["variables"]
variables = [variables[i] for i in predicted_indices]
print('Variables:', variables)
# print('Raw Model Variables:', variables)
# Split variables between pressure and surface
surface_variables = [v for v in variables if '_' not in v]
pressure_level_variables = [v for v in variables if '_' in v]
pressure_levels = sorted(set([v.split('_')[-1] for v in pressure_level_variables]))
pressure_level_variables = sorted(set([v.split('_')[0] for v in pressure_level_variables]))
levels_for_variables = defaultdict(list)
# Collect the levels for each pressure variable
level_variables = defaultdict(list)
for v in variables:
if "_" in v:
variable, level = v.split('_')
levels_for_variables[variable].append(level)
if '_' in v:
variable, level = v.split("_")
level_variables[variable].append(int(level))
print('Levels for variables:', levels_for_variables)
# print(level_variables)
print('Pressure level variables:', pressure_level_variables)
print('Pressure levels:', sorted([int(p) for p in pressure_levels]))
# Use qubed library to contruct tree
from qubed import Qube
print('Surface variables:', surface_variables)
model_tree = Qube.empty()
frequency = metadata['config']['data']['frequency']
print("Frequency:", frequency)
for variable, levels in level_variables.items():
model_tree = model_tree | Qube.from_datacube({
"levtype": "pl",
"param" : variable,
"level" : levels,
})
for variable in surface_variables:
model_tree = model_tree | Qube.from_datacube({
"levtype": "sfc",
"param" : variable,
})
print(model_tree.to_json())

View File

@ -66,11 +66,21 @@ class Qube:
return Qube.make(
key=json["key"],
values=values_from_json(json["values"]),
metadata=json["metadata"] if "metadata" in json else {},
metadata=frozendict(json["metadata"]) if "metadata" in json else {},
children=(from_json(c) for c in json["children"]),
)
return from_json(json)
def to_json(self) -> dict:
def to_json(node: Qube) -> dict:
return {
"key": node.key,
"values": node.values.to_json(),
"metadata": dict(node.metadata),
"children": [to_json(c) for c in node.children]
}
return to_json(self)
@classmethod
def from_dict(cls, d: dict) -> 'Qube':
def from_dict(d: dict) -> list[Qube]:
@ -102,6 +112,12 @@ class Qube:
def _repr_html_(self) -> str:
return node_tree_to_html(self, depth = 2, collapse = True)
# Allow "key=value/value" / qube to prepend keys
def __rtruediv__(self, other: str) -> "Qube":
key, values = other.split("=")
values = QEnum((values.split("/")))
return Qube.root_node([Qube.make(key, values, self.children)])
def __or__(self, other: "Qube") -> "Qube":
return set_operations.operation(self, other, set_operations.SetOperation.UNION, type(self))
@ -176,7 +192,7 @@ class Qube:
return dataclasses.replace(self, children = children)
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Qube':
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed", prune=True) -> 'Qube':
# make all values lists
selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
@ -187,7 +203,15 @@ class Qube:
if node.key not in selection:
if mode == "strict":
return None
return dataclasses.replace(node, children = not_none(select(c) for c in node.children))
new_children = not_none(select(c) for c in node.children)
# prune==true then remove any non-leaf nodes
# which have had all their children removed
if prune and node.children and not new_children:
return None
return dataclasses.replace(node, children = new_children)
# If the key is specified, check if any of the values match
values = QEnum((c for c in selection[node.key] if c in node.values))

View File

@ -30,6 +30,10 @@ class Values(ABC):
def min(self):
pass
@abstractmethod
def to_json(self):
pass
T = TypeVar("T")
EnumValuesType = FrozenSet[T]
@dataclass(frozen=True, order=True)
@ -61,6 +65,8 @@ class QEnum(Values):
return [type(self)(tuple(values))]
def min(self):
return min(self.values)
def to_json(self):
return list(self.values)
@dataclass(frozen=True)
class Range(Values, ABC):

16
tests/test_conversions.py Normal file
View File

@ -0,0 +1,16 @@
from qubed import Qube
def test_json_round_trip():
u = Qube.from_dict({
"class=d1" : {
"dataset=climate-dt/weather-dt" : {
"generation=1/2/3/4" : {},
},
"dataset=another-value" : {
"generation=1/2/3" : {},
},
}
})
json = u.to_json()
assert Qube.from_json(json) == u

View File

@ -16,20 +16,4 @@ def test_iter_leaves_simple():
{"a" : '2', "b" : '2'},
]
assert set(make_hashable(q.leaves())) == set(make_hashable(entries))
# def test_iter_leaves():
# d = {
# "class=od" : {
# "expver=0001": {"param=1":{}, "param=2":{}},
# "expver=0002": {"param=1":{}, "param=2":{}},
# },
# "class=rd" : {
# "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
# "expver=0002": {"param=1":{}, "param=2":{}},
# },
# }
# q = Qube.from_dict(d)
# r = Qube.from_dict(d)
# assert q == r
assert set(make_hashable(q.leaves())) == set(make_hashable(entries))