Merge branch 'fiab'
This commit is contained in:
commit
ecccf336b4
103
docs/fiab.md
Normal file
103
docs/fiab.md
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
---
|
||||||
|
jupytext:
|
||||||
|
text_representation:
|
||||||
|
extension: .md
|
||||||
|
format_name: myst
|
||||||
|
format_version: 0.13
|
||||||
|
jupytext_version: 1.16.4
|
||||||
|
---
|
||||||
|
|
||||||
|
# Fiab
|
||||||
|
|
||||||
|
## Model Selection
|
||||||
|
|
||||||
|
This is a demo of using qubed to select from a set of forecast models that each produce a set of output variables.
|
||||||
|
|
||||||
|
First let's construct some models represented as qubes:
|
||||||
|
|
||||||
|
```{code-cell} python3
|
||||||
|
from qubed import Qube
|
||||||
|
model_1 = Qube.from_datacube({
|
||||||
|
"levtype": "pl",
|
||||||
|
"param" : ["q", "t", "u", "v", "w", "z"],
|
||||||
|
"level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
|
||||||
|
}) | Qube.from_datacube({
|
||||||
|
"levtype": "sfc",
|
||||||
|
"param" : ["10u", "10v", "2d", "2t", "cp", "msl", "skt", "sp", "tcw", "tp"],
|
||||||
|
})
|
||||||
|
|
||||||
|
model_1 = "model=1" / ("frequency=6h" / model_1)
|
||||||
|
model_1
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the most complete model. Now let's do one with fewer variables and levels:
|
||||||
|
|
||||||
|
```{code-cell} python3
|
||||||
|
model_2 = Qube.from_datacube({
|
||||||
|
"levtype": "pl",
|
||||||
|
"param" : ["q", "t"],
|
||||||
|
"level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
|
||||||
|
}) | Qube.from_datacube({
|
||||||
|
"levtype": "sfc",
|
||||||
|
"param" : ["2t", "cp", "msl"],
|
||||||
|
})
|
||||||
|
model_2 = "model=2" / ("frequency=continuous" / model_2)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{code-cell} python3
|
||||||
|
model_3 = Qube.from_datacube({
|
||||||
|
"levtype": "pl",
|
||||||
|
"param" : ["q", "t"],
|
||||||
|
"level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
|
||||||
|
}) | Qube.from_datacube({
|
||||||
|
"levtype": "sfc",
|
||||||
|
"param" : ["2t", "cp", "msl"],
|
||||||
|
})
|
||||||
|
model_3 = "model=3" / ("frequency=6h" / model_3)
|
||||||
|
model_3
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Now we can combine the three models into a single qube:
|
||||||
|
|
||||||
|
```{code-cell} python3
|
||||||
|
all_models = model_1 | model_2 | model_3
|
||||||
|
all_models
|
||||||
|
```
|
||||||
|
|
||||||
|
Now we can perform queries over the models. We can get all models that produce 2m temperature:
|
||||||
|
```{code-cell} python3
|
||||||
|
all_models.select({
|
||||||
|
"param" : "2t",
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
Filter on both parameter and frequency:
|
||||||
|
|
||||||
|
```{code-cell} python3
|
||||||
|
all_models.select({
|
||||||
|
"param" : "2t",
|
||||||
|
"frequency": "continuous",
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
Find all models that have some overlap with this set of parameters:
|
||||||
|
|
||||||
|
```{code-cell} python3
|
||||||
|
all_models.select({
|
||||||
|
"param" : ["q", "t", "u", "v"],
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
## Choosing a set of models based on the requested parameter set
|
||||||
|
|
||||||
|
```{code-cell} python3
|
||||||
|
all_models.select({
|
||||||
|
"param" : ["q", "t", "u", "v"],
|
||||||
|
"frequency": "6h",
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
<!-- ## Choosing the fewest models needed to cover the requested parameter set -->
|
||||||
|
|
||||||
|
<!-- ```{code-cell} python3 -->
|
@ -16,6 +16,7 @@ quickstart.md
|
|||||||
api.md
|
api.md
|
||||||
development.md
|
development.md
|
||||||
algorithms.md
|
algorithms.md
|
||||||
|
fiab.md
|
||||||
```
|
```
|
||||||
|
|
||||||
Qubed provides a datastructure called a Qube which represents sets of data identified by multiple key value pairs as a tree of datacubes. To understand what that means go to [Background](background.md), to just start using the library skip straight to the [Quickstart](quickstart.md).
|
Qubed provides a datastructure called a Qube which represents sets of data identified by multiple key value pairs as a tree of datacubes. To understand what that means go to [Background](background.md), to just start using the library skip straight to the [Quickstart](quickstart.md).
|
||||||
|
1
fiab/.gitignore
vendored
Normal file
1
fiab/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
!*.json
|
37
fiab/example_products.md
Normal file
37
fiab/example_products.md
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Simplest possible product
|
||||||
|
- one field: 2 metre temperature
|
||||||
|
- all models that output param=2t would work
|
||||||
|
- may also have a lead time range specified from
|
||||||
|
|
||||||
|
So we could say "here are all the models with param=2t with lead times in the specified interval"
|
||||||
|
|
||||||
|
quantiles
|
||||||
|
param:
|
||||||
|
float range from 0 - 100
|
||||||
|
|
||||||
|
threshold:
|
||||||
|
"give me 2 metre temperature values that are above this threshold"
|
||||||
|
|
||||||
|
|
||||||
|
product requrements can be specified as a set of:
|
||||||
|
params: one or more params
|
||||||
|
levels: one or more or all
|
||||||
|
time:
|
||||||
|
- product could be specific to a particular time
|
||||||
|
- could require at least a months worth of data
|
||||||
|
|
||||||
|
|
||||||
|
make some fake models that have:
|
||||||
|
- fewer params
|
||||||
|
- continous times vs steps of 6 hours
|
||||||
|
-
|
||||||
|
|
||||||
|
|
||||||
|
Could also represent what data is currently cached on disk and be able to then tell the use what stuff they can generate really fast.
|
||||||
|
|
||||||
|
API want:
|
||||||
|
- way to get axis span like what params exist
|
||||||
|
-
|
42
fiab/extract.py
Normal file
42
fiab/extract.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import json
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
metadata = json.load(open("raw_anemoi_metadata.json"))
|
||||||
|
|
||||||
|
predicted_indices = [*metadata['data_indices']['data']['output']['prognostic'], *metadata['data_indices']['data']['output']['diagnostic']]
|
||||||
|
variables = metadata['dataset']["variables"]
|
||||||
|
variables = [variables[i] for i in predicted_indices]
|
||||||
|
|
||||||
|
# print('Raw Model Variables:', variables)
|
||||||
|
|
||||||
|
# Split variables between pressure and surface
|
||||||
|
surface_variables = [v for v in variables if '_' not in v]
|
||||||
|
|
||||||
|
# Collect the levels for each pressure variable
|
||||||
|
level_variables = defaultdict(list)
|
||||||
|
for v in variables:
|
||||||
|
if '_' in v:
|
||||||
|
variable, level = v.split("_")
|
||||||
|
level_variables[variable].append(int(level))
|
||||||
|
|
||||||
|
# print(level_variables)
|
||||||
|
|
||||||
|
# Use qubed library to contruct tree
|
||||||
|
from qubed import Qube
|
||||||
|
|
||||||
|
model_tree = Qube.empty()
|
||||||
|
|
||||||
|
for variable, levels in level_variables.items():
|
||||||
|
model_tree = model_tree | Qube.from_datacube({
|
||||||
|
"levtype": "pl",
|
||||||
|
"param" : variable,
|
||||||
|
"level" : levels,
|
||||||
|
})
|
||||||
|
|
||||||
|
for variable in surface_variables:
|
||||||
|
model_tree = model_tree | Qube.from_datacube({
|
||||||
|
"levtype": "sfc",
|
||||||
|
"param" : variable,
|
||||||
|
})
|
||||||
|
|
||||||
|
print(model_tree.to_json())
|
1
fiab/raw_anemoi_metadata.json
Normal file
1
fiab/raw_anemoi_metadata.json
Normal file
File diff suppressed because one or more lines are too long
67
fiab/structure.yaml
Normal file
67
fiab/structure.yaml
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
# Format: list of models, each model has a model_outputs field which contains a nested tree of nodes
|
||||||
|
# Nodes have {node: name, cube: list of key value(s) pairs, children: list[nodes]}
|
||||||
|
|
||||||
|
- model: surface_and_atmosphere_model
|
||||||
|
model_outputs:
|
||||||
|
- node: root
|
||||||
|
cube:
|
||||||
|
class: rd
|
||||||
|
stream: anemoi
|
||||||
|
expver: something
|
||||||
|
lead_time:
|
||||||
|
type: datetime
|
||||||
|
format: '%Y-%m-%d %H:%M:%S'
|
||||||
|
step: 6h
|
||||||
|
|
||||||
|
|
||||||
|
children:
|
||||||
|
- node: pressure_variables
|
||||||
|
other_metadata: something
|
||||||
|
cube:
|
||||||
|
param: ['q', 't', 'u', 'v', 'w', 'z']
|
||||||
|
level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
|
||||||
|
|
||||||
|
|
||||||
|
- node: surface_variables
|
||||||
|
other_metadata: something
|
||||||
|
cube:
|
||||||
|
param: ['sp', 'msl', '10u', '10v', '2t', '2d', 'skt', 'tcw', 'cp', 'tp']
|
||||||
|
|
||||||
|
# Hypothetical Ocean variables
|
||||||
|
- node: ocean_variables
|
||||||
|
cube:
|
||||||
|
param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"]
|
||||||
|
ocean_levels: [??, ??]
|
||||||
|
|
||||||
|
# Alternative List of cubes format
|
||||||
|
- model: surface_and_atmosphere_model
|
||||||
|
model_outputs:
|
||||||
|
- node: root
|
||||||
|
cube:
|
||||||
|
class: rd
|
||||||
|
stream: anemoi
|
||||||
|
expver: something
|
||||||
|
lead_time:
|
||||||
|
type: datetime
|
||||||
|
format: '%Y-%m-%d %H:%M:%S'
|
||||||
|
step: 6h
|
||||||
|
|
||||||
|
|
||||||
|
children:
|
||||||
|
- node: pressure_variables
|
||||||
|
other_metadata: something
|
||||||
|
cube:
|
||||||
|
param: ['q', 't', 'u', 'v', 'w', 'z']
|
||||||
|
level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
|
||||||
|
|
||||||
|
|
||||||
|
- node: surface_variables
|
||||||
|
other_metadata: something
|
||||||
|
cube:
|
||||||
|
param: ['sp', 'msl', '10u', '10v', '2t', '2d', 'skt', 'tcw', 'cp', 'tp']
|
||||||
|
|
||||||
|
# Hypothetical Ocean variables
|
||||||
|
- node: ocean_variables
|
||||||
|
cube:
|
||||||
|
param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"]
|
||||||
|
ocean_levels: [??, ??]
|
@ -66,11 +66,21 @@ class Qube:
|
|||||||
return Qube.make(
|
return Qube.make(
|
||||||
key=json["key"],
|
key=json["key"],
|
||||||
values=values_from_json(json["values"]),
|
values=values_from_json(json["values"]),
|
||||||
metadata=json["metadata"] if "metadata" in json else {},
|
metadata=frozendict(json["metadata"]) if "metadata" in json else {},
|
||||||
children=(from_json(c) for c in json["children"]),
|
children=(from_json(c) for c in json["children"]),
|
||||||
)
|
)
|
||||||
return from_json(json)
|
return from_json(json)
|
||||||
|
|
||||||
|
def to_json(self) -> dict:
|
||||||
|
def to_json(node: Qube) -> dict:
|
||||||
|
return {
|
||||||
|
"key": node.key,
|
||||||
|
"values": node.values.to_json(),
|
||||||
|
"metadata": dict(node.metadata),
|
||||||
|
"children": [to_json(c) for c in node.children]
|
||||||
|
}
|
||||||
|
return to_json(self)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dict(cls, d: dict) -> 'Qube':
|
def from_dict(cls, d: dict) -> 'Qube':
|
||||||
def from_dict(d: dict) -> list[Qube]:
|
def from_dict(d: dict) -> list[Qube]:
|
||||||
@ -102,6 +112,12 @@ class Qube:
|
|||||||
def _repr_html_(self) -> str:
|
def _repr_html_(self) -> str:
|
||||||
return node_tree_to_html(self, depth = 2, collapse = True)
|
return node_tree_to_html(self, depth = 2, collapse = True)
|
||||||
|
|
||||||
|
# Allow "key=value/value" / qube to prepend keys
|
||||||
|
def __rtruediv__(self, other: str) -> "Qube":
|
||||||
|
key, values = other.split("=")
|
||||||
|
values = QEnum((values.split("/")))
|
||||||
|
return Qube.root_node([Qube.make(key, values, self.children)])
|
||||||
|
|
||||||
def __or__(self, other: "Qube") -> "Qube":
|
def __or__(self, other: "Qube") -> "Qube":
|
||||||
return set_operations.operation(self, other, set_operations.SetOperation.UNION, type(self))
|
return set_operations.operation(self, other, set_operations.SetOperation.UNION, type(self))
|
||||||
|
|
||||||
@ -176,7 +192,7 @@ class Qube:
|
|||||||
return dataclasses.replace(self, children = children)
|
return dataclasses.replace(self, children = children)
|
||||||
|
|
||||||
|
|
||||||
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Qube':
|
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed", prune=True) -> 'Qube':
|
||||||
# make all values lists
|
# make all values lists
|
||||||
selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
|
selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
|
||||||
|
|
||||||
@ -187,7 +203,15 @@ class Qube:
|
|||||||
if node.key not in selection:
|
if node.key not in selection:
|
||||||
if mode == "strict":
|
if mode == "strict":
|
||||||
return None
|
return None
|
||||||
return dataclasses.replace(node, children = not_none(select(c) for c in node.children))
|
|
||||||
|
new_children = not_none(select(c) for c in node.children)
|
||||||
|
|
||||||
|
# prune==true then remove any non-leaf nodes
|
||||||
|
# which have had all their children removed
|
||||||
|
if prune and node.children and not new_children:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return dataclasses.replace(node, children = new_children)
|
||||||
|
|
||||||
# If the key is specified, check if any of the values match
|
# If the key is specified, check if any of the values match
|
||||||
values = QEnum((c for c in selection[node.key] if c in node.values))
|
values = QEnum((c for c in selection[node.key] if c in node.values))
|
||||||
|
@ -30,6 +30,10 @@ class Values(ABC):
|
|||||||
def min(self):
|
def min(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def to_json(self):
|
||||||
|
pass
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
EnumValuesType = FrozenSet[T]
|
EnumValuesType = FrozenSet[T]
|
||||||
@dataclass(frozen=True, order=True)
|
@dataclass(frozen=True, order=True)
|
||||||
@ -61,6 +65,8 @@ class QEnum(Values):
|
|||||||
return [type(self)(tuple(values))]
|
return [type(self)(tuple(values))]
|
||||||
def min(self):
|
def min(self):
|
||||||
return min(self.values)
|
return min(self.values)
|
||||||
|
def to_json(self):
|
||||||
|
return list(self.values)
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class Range(Values, ABC):
|
class Range(Values, ABC):
|
||||||
|
16
tests/test_conversions.py
Normal file
16
tests/test_conversions.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from qubed import Qube
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_round_trip():
|
||||||
|
u = Qube.from_dict({
|
||||||
|
"class=d1" : {
|
||||||
|
"dataset=climate-dt/weather-dt" : {
|
||||||
|
"generation=1/2/3/4" : {},
|
||||||
|
},
|
||||||
|
"dataset=another-value" : {
|
||||||
|
"generation=1/2/3" : {},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
})
|
||||||
|
json = u.to_json()
|
||||||
|
assert Qube.from_json(json) == u
|
@ -17,19 +17,3 @@ def test_iter_leaves_simple():
|
|||||||
]
|
]
|
||||||
|
|
||||||
assert set(make_hashable(q.leaves())) == set(make_hashable(entries))
|
assert set(make_hashable(q.leaves())) == set(make_hashable(entries))
|
||||||
|
|
||||||
# def test_iter_leaves():
|
|
||||||
# d = {
|
|
||||||
# "class=od" : {
|
|
||||||
# "expver=0001": {"param=1":{}, "param=2":{}},
|
|
||||||
# "expver=0002": {"param=1":{}, "param=2":{}},
|
|
||||||
# },
|
|
||||||
# "class=rd" : {
|
|
||||||
# "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
|
|
||||||
# "expver=0002": {"param=1":{}, "param=2":{}},
|
|
||||||
# },
|
|
||||||
# }
|
|
||||||
# q = Qube.from_dict(d)
|
|
||||||
# r = Qube.from_dict(d)
|
|
||||||
|
|
||||||
# assert q == r
|
|
Loading…
x
Reference in New Issue
Block a user