diff --git a/docs/fiab.md b/docs/fiab.md new file mode 100644 index 0000000..bd26297 --- /dev/null +++ b/docs/fiab.md @@ -0,0 +1,103 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.16.4 +--- + +# Fiab + +## Model Selection + +This is a demo of using qubed to select from a set of forecast models that each produce a set of output variables. + +First let's construct some models represented as qubes: + +```{code-cell} python3 +from qubed import Qube +model_1 = Qube.from_datacube({ + "levtype": "pl", + "param" : ["q", "t", "u", "v", "w", "z"], + "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000], + }) | Qube.from_datacube({ + "levtype": "sfc", + "param" : ["10u", "10v", "2d", "2t", "cp", "msl", "skt", "sp", "tcw", "tp"], +}) + +model_1 = "model=1" / ("frequency=6h" / model_1) +model_1 +``` + +This is the most complete model. Now let's do one with fewer variables and levels: + +```{code-cell} python3 +model_2 = Qube.from_datacube({ + "levtype": "pl", + "param" : ["q", "t"], + "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000], + }) | Qube.from_datacube({ + "levtype": "sfc", + "param" : ["2t", "cp", "msl"], +}) +model_2 = "model=2" / ("frequency=continuous" / model_2) +``` + +```{code-cell} python3 +model_3 = Qube.from_datacube({ + "levtype": "pl", + "param" : ["q", "t"], + "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000], + }) | Qube.from_datacube({ + "levtype": "sfc", + "param" : ["2t", "cp", "msl"], +}) +model_3 = "model=3" / ("frequency=6h" / model_3) +model_3 +``` + + +Now we can combine the three models into a single qube: + +```{code-cell} python3 +all_models = model_1 | model_2 | model_3 +all_models +``` + +Now we can perform queries over the models. We can get all models that produce 2m temperature: +```{code-cell} python3 +all_models.select({ + "param" : "2t", +}) +``` + +Filter on both parameter and frequency: + +```{code-cell} python3 +all_models.select({ + "param" : "2t", + "frequency": "continuous", +}) +``` + +Find all models that have some overlap with this set of parameters: + +```{code-cell} python3 +all_models.select({ + "param" : ["q", "t", "u", "v"], +}) +``` + +## Choosing a set of models based on the requested parameter set + +```{code-cell} python3 +all_models.select({ + "param" : ["q", "t", "u", "v"], + "frequency": "6h", +}) +``` + + + + diff --git a/docs/index.md b/docs/index.md index 628a5f1..53398e0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,6 +16,7 @@ quickstart.md api.md development.md algorithms.md +fiab.md ``` Qubed provides a datastructure called a Qube which represents sets of data identified by multiple key value pairs as a tree of datacubes. To understand what that means go to [Background](background.md), to just start using the library skip straight to the [Quickstart](quickstart.md). diff --git a/fiab/.gitignore b/fiab/.gitignore new file mode 100644 index 0000000..b599028 --- /dev/null +++ b/fiab/.gitignore @@ -0,0 +1 @@ +!*.json \ No newline at end of file diff --git a/fiab/example_products.md b/fiab/example_products.md new file mode 100644 index 0000000..c189610 --- /dev/null +++ b/fiab/example_products.md @@ -0,0 +1,37 @@ + + + +Simplest possible product +- one field: 2 metre temperature +- all models that output param=2t would work +- may also have a lead time range specified from + +So we could say "here are all the models with param=2t with lead times in the specified interval" + +quantiles + param: + float range from 0 - 100 + +threshold: + "give me 2 metre temperature values that are above this threshold" + + + product requrements can be specified as a set of: + params: one or more params + levels: one or more or all + time: + - product could be specific to a particular time + - could require at least a months worth of data + + +make some fake models that have: + - fewer params + - continous times vs steps of 6 hours + - + + +Could also represent what data is currently cached on disk and be able to then tell the use what stuff they can generate really fast. + +API want: + - way to get axis span like what params exist + - diff --git a/fiab/extract.py b/fiab/extract.py new file mode 100644 index 0000000..089a74e --- /dev/null +++ b/fiab/extract.py @@ -0,0 +1,42 @@ +import json +from collections import defaultdict + +metadata = json.load(open("raw_anemoi_metadata.json")) + +predicted_indices = [*metadata['data_indices']['data']['output']['prognostic'], *metadata['data_indices']['data']['output']['diagnostic']] +variables = metadata['dataset']["variables"] +variables = [variables[i] for i in predicted_indices] + +# print('Raw Model Variables:', variables) + +# Split variables between pressure and surface +surface_variables = [v for v in variables if '_' not in v] + +# Collect the levels for each pressure variable +level_variables = defaultdict(list) +for v in variables: + if '_' in v: + variable, level = v.split("_") + level_variables[variable].append(int(level)) + +# print(level_variables) + +# Use qubed library to contruct tree +from qubed import Qube + +model_tree = Qube.empty() + +for variable, levels in level_variables.items(): + model_tree = model_tree | Qube.from_datacube({ + "levtype": "pl", + "param" : variable, + "level" : levels, + }) + +for variable in surface_variables: + model_tree = model_tree | Qube.from_datacube({ + "levtype": "sfc", + "param" : variable, + }) + +print(model_tree.to_json()) \ No newline at end of file diff --git a/fiab/raw_anemoi_metadata.json b/fiab/raw_anemoi_metadata.json new file mode 100644 index 0000000..fe88873 --- /dev/null +++ b/fiab/raw_anemoi_metadata.json @@ -0,0 +1 @@ +{"version": "1.0", "config": {"data": {"format": "zarr", "resolution": "n320", "frequency": "6h", "timestep": "6h", "forcing": ["cos_latitude", "cos_longitude", "sin_latitude", "sin_longitude", "cos_julian_day", "cos_local_time", "sin_julian_day", "sin_local_time", "insolation", "lsm", "sdor", "slor", "z"], "diagnostic": ["tp", "cp"], "normalizer": {"default": "mean-std", "min-max": null, "max": ["sdor", "slor", "z"], "none": ["cos_latitude", "cos_longitude", "sin_latitude", "sin_longitude", "cos_julian_day", "cos_local_time", "sin_julian_day", "sin_local_time", "insolation", "lsm"]}, "imputer": {"default": "none"}, "processors": {"normalizer": {"_target_": "anemoi.models.preprocessing.normalizer.InputNormalizer", "_convert_": "all", "config": {"default": "mean-std", "min-max": null, "max": ["sdor", "slor", "z"], "none": ["cos_latitude", "cos_longitude", "sin_latitude", "sin_longitude", "cos_julian_day", "cos_local_time", "sin_julian_day", "sin_local_time", "insolation", "lsm"]}}}, "num_features": 101}, "dataloader": {"prefetch_factor": 2, "num_workers": {"training": 2, "validation": 2, "test": 2, "predict": 2}, "batch_size": {"training": 1, "validation": 1, "test": 1, "predict": 1}, "limit_batches": {"training": 1, "validation": 1, "test": 20, "predict": 20}, "dataset": "ai-ml/datasets/stable//aifs-od-an-oper-0001-mars-n320-2019-2021-6h-v4.zarr", "training": {"dataset": "ai-ml/datasets/stable//aifs-od-an-oper-0001-mars-n320-2019-2021-6h-v4.zarr", "start": null, "end": 2020, "frequency": "6h", "drop": ["sd"]}, "validation": {"dataset": "ai-ml/datasets/stable//aifs-od-an-oper-0001-mars-n320-2019-2021-6h-v4.zarr", "start": 2021, "end": 2021, "frequency": "6h", "drop": ["sd"]}, "test": {"dataset": "ai-ml/datasets/stable//aifs-od-an-oper-0001-mars-n320-2019-2021-6h-v4.zarr", "start": 2022, "end": null, "frequency": "6h"}}, "model": {"activation": "GELU", "num_channels": 1024, "processor": {"_target_": "anemoi.models.layers.processor.TransformerProcessor", "_convert_": "all", "activation": "GELU", "num_layers": 16, "num_chunks": 2, "mlp_hidden_ratio": 4, "num_heads": 16, "window_size": 1120, "dropout_p": 0.0}, "encoder": {"_target_": "anemoi.models.layers.mapper.GraphTransformerForwardMapper", "_convert_": "all", "trainable_size": 8, "sub_graph_edge_attributes": ["edge_length", "edge_dirs"], "activation": "GELU", "num_chunks": 1, "mlp_hidden_ratio": 4, "num_heads": 16}, "decoder": {"_target_": "anemoi.models.layers.mapper.GraphTransformerBackwardMapper", "_convert_": "all", "trainable_size": 8, "sub_graph_edge_attributes": ["edge_length", "edge_dirs"], "activation": "GELU", "num_chunks": 1, "mlp_hidden_ratio": 4, "num_heads": 16}, "trainable_parameters": {"data": 8, "hidden": 8, "data2hidden": 8, "hidden2data": 8}, "attributes": {"edges": ["edge_length", "edge_dirs"], "nodes": []}, "node_loss_weight": "area_weights"}, "training": {"run_id": "e6d5284b-2e07-43ab-abec-03cbe13e3dee", "fork_run_id": null, "load_weights_only": null, "deterministic": false, "precision": "16-mixed", "multistep_input": 2, "accum_grad_batches": 1, "gradient_clip": {"val": 32.0, "algorithm": "value"}, "swa": {"enabled": false, "lr": 0.0001}, "zero_optimizer": false, "loss_gradient_scaling": false, "rollout": {"start": 1, "epoch_increment": 0, "max": 1}, "max_epochs": 1, "lr": {"rate": 6.25e-05, "iterations": 300000, "min": 3e-07}, "loss_scaling": {"default": 1, "pl": {"q": 0.6, "t": 6, "u": 0.8, "v": 0.5, "w": 0.001, "z": 12}, "sfc": {"sp": 10, "10u": 0.1, "10v": 0.1, "2d": 0.5, "tp": 0.025, "cp": 0.0025}}, "metrics": ["z_500", "t_850", "u_850", "v_850"], "pressure_level_scaler": {"_target_": "anemoi.training.data.scaling.ReluPressureLevelScaler", "minimum": 0.2, "slope": 0.001}}, "graph": {"overwrite": false, "data": "era", "hidden": "h", "nodes": {"data": {"node_builder": {"_target_": "anemoi.graphs.nodes.ZarrDatasetNodes", "dataset": "ai-ml/datasets/stable//aifs-od-an-oper-0001-mars-n320-2019-2021-6h-v4.zarr"}, "attributes": {"area_weight": {"_target_": "anemoi.graphs.nodes.attributes.AreaWeights", "norm": "unit-max"}}}, "hidden": {"node_builder": {"_target_": "anemoi.graphs.nodes.NPZFileNodes", "grid_definition_path": "ai-ml/grids/", "resolution": "o96"}}}, "edges": [{"source_name": "era", "target_name": "h", "edge_builder": {"_target_": "anemoi.graphs.edges.CutOffEdges", "cutoff_factor": 0.6}, "attributes": {"edge_length": {"_target_": "anemoi.graphs.edges.attributes.EdgeLength", "norm": "unit-std"}, "edge_dirs": {"_target_": "anemoi.graphs.edges.attributes.EdgeDirection", "norm": "unit-std"}}}, {"source_name": "h", "target_name": "era", "edge_builder": {"_target_": "anemoi.graphs.edges.KNNEdges", "num_nearest_neighbours": 3}, "attributes": {"edge_length": {"_target_": "anemoi.graphs.edges.attributes.EdgeLength", "norm": "unit-std"}, "edge_dirs": {"_target_": "anemoi.graphs.edges.attributes.EdgeDirection", "norm": "unit-std"}}}], "attributes": {"nodes": {"area_weight": {"_target_": "anemoi.graphs.nodes.attributes.AreaWeights", "norm": "unit-max"}}, "edges": {"edge_length": {"_target_": "anemoi.graphs.edges.attributes.EdgeLength", "norm": "unit-std"}, "edge_dirs": {"_target_": "anemoi.graphs.edges.attributes.EdgeDirection", "norm": "unit-std"}}}}, "diagnostics": {"eval": {"enabled": false, "rollout": 12, "frequency": 20}, "plot": {"enabled": false, "asynchronous": true, "frequency": 750, "sample_idx": 0, "per_sample": 6, "parameters": ["z_500", "t_850", "u_850", "v_850", "2t", "10u", "10v", "sp", "tp", "cp"], "accumulation_levels_plot": [0, 0.05, 0.1, 0.25, 0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 100], "cmap_accumulation": ["#ffffff", "#04e9e7", "#019ff4", "#0300f4", "#02fd02", "#01c501", "#008e00", "#fdf802", "#e5bc00", "#fd9500", "#fd0000", "#d40000", "#bc0000", "#f800fd"], "parameters_histogram": ["z_500", "tp", "2t", "10u", "10v"], "parameters_spectrum": ["z_500", "tp", "2t", "10u", "10v"], "parameter_groups": {"moisture": ["tp", "cp", "tcw"], "sfc_wind": ["10u", "10v"]}, "learned_features": false}, "debug": {"anomaly_detection": false}, "profiler": false, "checkpoint": {"every_n_minutes": {"save_frequency": 30, "num_models_saved": 3}, "every_n_epochs": {"save_frequency": 1, "num_models_saved": -1}, "every_n_train_steps": {"save_frequency": null, "num_models_saved": 0}}, "log": {"wandb": {"enabled": false, "offline": false, "log_model": false, "project": null, "entity": null, "gradients": false, "parameters": false}, "tensorboard": {"enabled": false}, "mlflow": {"enabled": false, "offline": false, "authentication": false, "log_model": false, "tracking_uri": null, "experiment_name": "", "project_name": "", "system": true, "terminal": true, "run_name": null, "on_resume_create_child": true}, "interval": 100}, "enable_progress_bar": true, "print_memory_summary": false}, "hardware": {"accelerator": "auto", "num_gpus_per_model": 2, "num_gpus_per_node": 2, "num_nodes": 1, "paths": {"data": "ai-ml/datasets/stable/", "graph": "data/anemoi-graphs/", "grids": "ai-ml/grids/", "output": "anemoi/n320/", "logs": {"base": "anemoi/n320/logs/", "wandb": "anemoi/n320/logs/", "mlflow": "anemoi/n320/logs/mlflow/", "tensorboard": "anemoi/n320/logs/tensorboard/"}, "checkpoints": "anemoi/n320/checkpoint/e6d5284b-2e07-43ab-abec-03cbe13e3dee", "plots": "anemoi/n320/plots/e6d5284b-2e07-43ab-abec-03cbe13e3dee", "profiler": "anemoi/n320/profiler/"}, "files": {"dataset": "aifs-od-an-oper-0001-mars-n320-2019-2021-6h-v4.zarr", "graph": "graph_mappings_normed_edge_attrs_20231211171329_n320_h_o96.pt", "checkpoint": {"every_n_epochs": "aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e}", "every_n_train_steps": "aifs-by_step-epoch_{epoch:03d}-step_{step:06d}", "every_n_minutes": "aifs-by_time-epoch_{epoch:03d}-step_{step:06d}"}, "warm_start": null}}}, "seed": 35142637, "run_id": "e6d5284b-2e07-43ab-abec-03cbe13e3dee", "dataset": {"version": "0.4.3", "shape": [2924, 101, 1, 542080], "arguments": {"args": [{"dataset": "aifs-od-an-oper-0001-mars-n320-2019-2021-6h-v4.zarr", "start": null, "end": 2020, "frequency": "6h", "drop": ["sd"]}], "kwargs": {}}, "specific": {"action": "select", "variables": ["q_50", "q_100", "q_150", "q_200", "q_250", "q_300", "q_400", "q_500", "q_600", "q_700", "q_850", "q_925", "q_1000", "t_50", "t_100", "t_150", "t_200", "t_250", "t_300", "t_400", "t_500", "t_600", "t_700", "t_850", "t_925", "t_1000", "u_50", "u_100", "u_150", "u_200", "u_250", "u_300", "u_400", "u_500", "u_600", "u_700", "u_850", "u_925", "u_1000", "v_50", "v_100", "v_150", "v_200", "v_250", "v_300", "v_400", "v_500", "v_600", "v_700", "v_850", "v_925", "v_1000", "w_50", "w_100", "w_150", "w_200", "w_250", "w_300", "w_400", "w_500", "w_600", "w_700", "w_850", "w_925", "w_1000", "z_50", "z_100", "z_150", "z_200", "z_250", "z_300", "z_400", "z_500", "z_600", "z_700", "z_850", "z_925", "z_1000", "z", "sp", "msl", "lsm", "sdor", "slor", "10u", "10v", "2t", "2d", "skt", "tcw", "cp", "tp", "cos_latitude", "cos_longitude", "sin_latitude", "sin_longitude", "cos_julian_day", "cos_local_time", "sin_julian_day", "sin_local_time", "insolation"], "shape": [2924, 101, 1, 542080], "frequency": 6, "start_date": "2019-01-01T00:00:00", "end_date": "2020-12-31T18:00:00", "forward": {"action": "subset", "variables": ["q_50", "q_100", "q_150", "q_200", "q_250", "q_300", "q_400", "q_500", "q_600", "q_700", "q_850", "q_925", "q_1000", "t_50", "t_100", "t_150", "t_200", "t_250", "t_300", "t_400", "t_500", "t_600", "t_700", "t_850", "t_925", "t_1000", "u_50", "u_100", "u_150", "u_200", "u_250", "u_300", "u_400", "u_500", "u_600", "u_700", "u_850", "u_925", "u_1000", "v_50", "v_100", "v_150", "v_200", "v_250", "v_300", "v_400", "v_500", "v_600", "v_700", "v_850", "v_925", "v_1000", "w_50", "w_100", "w_150", "w_200", "w_250", "w_300", "w_400", "w_500", "w_600", "w_700", "w_850", "w_925", "w_1000", "z_50", "z_100", "z_150", "z_200", "z_250", "z_300", "z_400", "z_500", "z_600", "z_700", "z_850", "z_925", "z_1000", "z", "sp", "msl", "lsm", "sdor", "slor", "10u", "10v", "2t", "2d", "skt", "sd", "tcw", "cp", "tp", "cos_latitude", "cos_longitude", "sin_latitude", "sin_longitude", "cos_julian_day", "cos_local_time", "sin_julian_day", "sin_local_time", "insolation"], "shape": [2924, 102, 1, 542080], "frequency": 6, "start_date": "2019-01-01T00:00:00", "end_date": "2020-12-31T18:00:00", "forward": {"action": "zarr", "variables": ["q_50", "q_100", "q_150", "q_200", "q_250", "q_300", "q_400", "q_500", "q_600", "q_700", "q_850", "q_925", "q_1000", "t_50", "t_100", "t_150", "t_200", "t_250", "t_300", "t_400", "t_500", "t_600", "t_700", "t_850", "t_925", "t_1000", "u_50", "u_100", "u_150", "u_200", "u_250", "u_300", "u_400", "u_500", "u_600", "u_700", "u_850", "u_925", "u_1000", "v_50", "v_100", "v_150", "v_200", "v_250", "v_300", "v_400", "v_500", "v_600", "v_700", "v_850", "v_925", "v_1000", "w_50", "w_100", "w_150", "w_200", "w_250", "w_300", "w_400", "w_500", "w_600", "w_700", "w_850", "w_925", "w_1000", "z_50", "z_100", "z_150", "z_200", "z_250", "z_300", "z_400", "z_500", "z_600", "z_700", "z_850", "z_925", "z_1000", "z", "sp", "msl", "lsm", "sdor", "slor", "10u", "10v", "2t", "2d", "skt", "sd", "tcw", "cp", "tp", "cos_latitude", "cos_longitude", "sin_latitude", "sin_longitude", "cos_julian_day", "cos_local_time", "sin_julian_day", "sin_local_time", "insolation"], "shape": [4384, 102, 1, 542080], "frequency": 6, "start_date": "2019-01-01T00:00:00", "end_date": "2021-12-31T18:00:00", "attrs": {"data_request": {"area": [89.785, 0.0, -89.785, 359.719], "grid": "N320", "param_level": {"pl": [["q", 50], ["q", 100], ["q", 150], ["q", 200], ["q", 250], ["q", 300], ["q", 400], ["q", 500], ["q", 600], ["q", 700], ["q", 850], ["q", 925], ["q", 1000], ["t", 50], ["t", 100], ["t", 150], ["t", 200], ["t", 250], ["t", 300], ["t", 400], ["t", 500], ["t", 600], ["t", 700], ["t", 850], ["t", 925], ["t", 1000], ["u", 50], ["u", 100], ["u", 150], ["u", 200], ["u", 250], ["u", 300], ["u", 400], ["u", 500], ["u", 600], ["u", 700], ["u", 850], ["u", 925], ["u", 1000], ["v", 50], ["v", 100], ["v", 150], ["v", 200], ["v", 250], ["v", 300], ["v", 400], ["v", 500], ["v", 600], ["v", 700], ["v", 850], ["v", 925], ["v", 1000], ["w", 50], ["w", 100], ["w", 150], ["w", 200], ["w", 250], ["w", 300], ["w", 400], ["w", 500], ["w", 600], ["w", 700], ["w", 850], ["w", 925], ["w", 1000], ["z", 50], ["z", 100], ["z", 150], ["z", 200], ["z", 250], ["z", 300], ["z", 400], ["z", 500], ["z", 600], ["z", 700], ["z", 850], ["z", 925], ["z", 1000]], "sfc": ["10u", "10v", "2d", "2t", "cp", "lsm", "msl", "sd", "sdor", "skt", "slor", "sp", "tcw", "tp", "z"]}, "param_step": {"sfc": [["cp", 6], ["tp", 6]]}}, "description": "stable version of the dataset from 2019 to 2021, resolution n320.", "end_date": "2021-12-31T18:00:00", "ensemble_dimension": 2, "flatten_grid": true, "frequency": 6, "history": [{"action": "initialised", "timestamp": "2023-12-12T21:37:37.414738"}, {"action": "statistics_registry_initialised", "timestamp": "2023-12-12T21:37:37.421775", "version": 2}, {"action": "init finished", "timestamp": "2023-12-12T21:37:37.427636"}, {"action": "compute_statistics_end", "end": "2020-12-31T18:00:00", "i_end": 2923, "i_start": 0, "start": "2019-01-01T00:00:00", "timestamp": "2023-12-14T14:23:05.910668"}], "latest_write_timestamp": "2023-12-14T14:22:46.620644", "order_by": {"number": "ascending", "param_level": ["q_50", "q_100", "q_150", "q_200", "q_250", "q_300", "q_400", "q_500", "q_600", "q_700", "q_850", "q_925", "q_1000", "t_50", "t_100", "t_150", "t_200", "t_250", "t_300", "t_400", "t_500", "t_600", "t_700", "t_850", "t_925", "t_1000", "u_50", "u_100", "u_150", "u_200", "u_250", "u_300", "u_400", "u_500", "u_600", "u_700", "u_850", "u_925", "u_1000", "v_50", "v_100", "v_150", "v_200", "v_250", "v_300", "v_400", "v_500", "v_600", "v_700", "v_850", "v_925", "v_1000", "w_50", "w_100", "w_150", "w_200", "w_250", "w_300", "w_400", "w_500", "w_600", "w_700", "w_850", "w_925", "w_1000", "z_50", "z_100", "z_150", "z_200", "z_250", "z_300", "z_400", "z_500", "z_600", "z_700", "z_850", "z_925", "z_1000", "z", "sp", "msl", "lsm", "sdor", "slor", "10u", "10v", "2t", "2d", "skt", "sd", "tcw", "cp", "tp", "cos_latitude", "cos_longitude", "sin_latitude", "sin_longitude", "cos_julian_day", "cos_local_time", "sin_julian_day", "sin_local_time", "insolation"], "valid_datetime": "ascending"}, "provenance_load": {"git_versions": {"__main__": {"git": {"modified_files": 0, "sha1": "40c64a60e0bf7f3291de947ba5759e1a84c0927e", "untracked_files": 1}}, "__mp_main__": {"git": {"modified_files": 0, "sha1": "40c64a60e0bf7f3291de947ba5759e1a84c0927e", "untracked_files": 1}}, "climetlab": {"git": {"modified_files": 11, "sha1": "867967ca395a5ae7bca5817c9c5b7df115a4a5f3", "untracked_files": 1}}, "ecml_tools": {"git": {"modified_files": 2, "sha1": "e7d495669b8321cccd3701536931867ef6874baf", "untracked_files": 1}}, "prepml": {"git": {"modified_files": 0, "sha1": "40c64a60e0bf7f3291de947ba5759e1a84c0927e", "untracked_files": 1}}, "pyflow": {"git": {"modified_files": 0, "sha1": "6a436506a8492519fb810ec7867624fdba298de5", "untracked_files": 0}}}, "module_versions": {"__main__": "0.44.0", "__mp_main__": "0.44.0", "_cffi_backend": "1.15.1", "_csv": "1.0", "_ctypes": "1.1.0", "_decimal": "1.70", "argparse": "1.1", "arrow": "1.2.3", "attr": "23.1.0", "attrs": "23.1.0", "certifi": "2022.12.07", "cffi": "1.15.1", "cgi": "2.6", "chardet": "4.0.0", "charset_normalizer": "3.1.0", "climetlab": "0.19.0", "csv": "1.0", "ctypes": "1.1.0", "dateutil": "2.8.2", "decimal": "1.70", "earthkit.meteo": "0.0.1", "eccodes": "2.30.0", "ecflow": "5.11.4", "ecml_tools": "0.1.8", "ecmwf.opendata": "0.2.0", "ecmwfapi": "1.6.3", "ecmwflibs": "0.5.3", "entrypoints": "0.4", "fasteners": "0.18", "filelock": "3.12.0", "findlibs": "0.0.5", "fsspec": "2023.9.2", "gribapi": "2.30.0", "idna": "2.10", "ipaddress": "1.0", "jinja2": "3.1.2", "json": "2.0.9", "jsonpointer": "2.4", "jsonschema": "4.19.0", "logging": "0.5.1.2", "markdown": "3.4.3", "markupsafe": "2.1.2", "multiurl": "0.2.1", "numcodecs": "0.11.0", "numpy": "1.23.5", "pandas": "2.0.1", "platform": "1.0.8", "prepml": "0.44.0", "pycparser": "2.21", "pyflow": "3.1.6", "pytz": "2023.3", "re": "2.2.1", "requests": "2.31.0", "rfc3339_validator": "0.1.4", "rfc3986_validator": "0.1.1", "scipy": "1.10.1", "semantic_version": "2.10.0", "six": "1.16.0", "socks": "1.7.1", "tqdm": "4.65.0", "urllib3": "1.26.15", "webcolors": "1.13", "yaml": "6.0", "zarr": "2.16.1", "zlib": "1.0"}, "python": "3.10.10", "time": "2023-12-14T14:22:47.682655"}, "remapping": {"param_level": "{param}_{levelist}"}, "resolution": "N320", "start_date": "2019-01-01T00:00:00", "statistics_end_date": "2020-12-31T18:00:00", "statistics_start_date": "2019-01-01T00:00:00", "total_number_of_files": 4471, "total_size": 502783857898, "uuid": "da0f9216-ba5e-4441-8caa-63f935f4008c", "variables": ["q_50", "q_100", "q_150", "q_200", "q_250", "q_300", "q_400", "q_500", "q_600", "q_700", "q_850", "q_925", "q_1000", "t_50", "t_100", "t_150", "t_200", "t_250", "t_300", "t_400", "t_500", "t_600", "t_700", "t_850", "t_925", "t_1000", "u_50", "u_100", "u_150", "u_200", "u_250", "u_300", "u_400", "u_500", "u_600", "u_700", "u_850", "u_925", "u_1000", "v_50", "v_100", "v_150", "v_200", "v_250", "v_300", "v_400", "v_500", "v_600", "v_700", "v_850", "v_925", "v_1000", "w_50", "w_100", "w_150", "w_200", "w_250", "w_300", "w_400", "w_500", "w_600", "w_700", "w_850", "w_925", "w_1000", "z_50", "z_100", "z_150", "z_200", "z_250", "z_300", "z_400", "z_500", "z_600", "z_700", "z_850", "z_925", "z_1000", "z", "sp", "msl", "lsm", "sdor", "slor", "10u", "10v", "2t", "2d", "skt", "sd", "tcw", "cp", "tp", "cos_latitude", "cos_longitude", "sin_latitude", "sin_longitude", "cos_julian_day", "cos_local_time", "sin_julian_day", "sin_local_time", "insolation"], "version": "0.14"}, "chunks": [1, 102, 1, 542080], "dtype": "float32"}, "reason": {"frequency": "6h", "end": 2020}}, "reason": {"drop": ["sd"]}, "indices": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101]}, "frequency": 6, "variables": ["q_50", "q_100", "q_150", "q_200", "q_250", "q_300", "q_400", "q_500", "q_600", "q_700", "q_850", "q_925", "q_1000", "t_50", "t_100", "t_150", "t_200", "t_250", "t_300", "t_400", "t_500", "t_600", "t_700", "t_850", "t_925", "t_1000", "u_50", "u_100", "u_150", "u_200", "u_250", "u_300", "u_400", "u_500", "u_600", "u_700", "u_850", "u_925", "u_1000", "v_50", "v_100", "v_150", "v_200", "v_250", "v_300", "v_400", "v_500", "v_600", "v_700", "v_850", "v_925", "v_1000", "w_50", "w_100", "w_150", "w_200", "w_250", "w_300", "w_400", "w_500", "w_600", "w_700", "w_850", "w_925", "w_1000", "z_50", "z_100", "z_150", "z_200", "z_250", "z_300", "z_400", "z_500", "z_600", "z_700", "z_850", "z_925", "z_1000", "z", "sp", "msl", "lsm", "sdor", "slor", "10u", "10v", "2t", "2d", "skt", "tcw", "cp", "tp", "cos_latitude", "cos_longitude", "sin_latitude", "sin_longitude", "cos_julian_day", "cos_local_time", "sin_julian_day", "sin_local_time", "insolation"], "start_date": "2019-01-01T00:00:00", "end_date": "2020-12-31T18:00:00"}, "data_indices": {"data": {"input": {"full": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 92, 93, 94, 95, 96, 97, 98, 99, 100], "prognostic": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 84, 85, 86, 87, 88, 89], "diagnostic": [90, 91], "forcing": [78, 81, 82, 83, 92, 93, 94, 95, 96, 97, 98, 99, 100]}, "output": {"full": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 84, 85, 86, 87, 88, 89, 90, 91], "prognostic": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 84, 85, 86, 87, 88, 89], "diagnostic": [90, 91], "forcing": [78, 81, 82, 83, 92, 93, 94, 95, 96, 97, 98, 99, 100]}}, "model": {"input": {"full": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98], "prognostic": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 84, 85, 86, 87, 88, 89], "diagnostic": [], "forcing": [78, 81, 82, 83, 90, 91, 92, 93, 94, 95, 96, 97, 98]}, "output": {"full": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87], "prognostic": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85], "diagnostic": [86, 87], "forcing": []}}}, "provenance_training": {"time": "2024-08-08T14:27:30.343770", "python": "3.10.14", "module_versions": {"PIL": "10.3.0", "_csv": "1.0", "_ctypes": "1.1.0", "_decimal": "1.70", "_remote_module_non_scriptable": ".../_remote_module_non_scriptable.py", "argparse": "1.1", "astropy": "6.1.0", "attr": "23.2.0", "certifi": "2024.06.02", "cffi": "1.16.0", "cftime": "1.6.3", "charset_normalizer": "3.3.2", "cloudpickle": "3.0.0", "csv": "1.0", "ctypes": "1.1.0", "cycler": "0.12.1", "dateutil": "2.9.0", "decimal": "1.70", "defusedxml": "0.7.1", "einops": "0.6.1", "erfa": "2.0.1.4", "filelock": "3.14.0", "fsspec": "2024.6.0", "flash_attn": "2.6.3", "huggingface_hub": "0.23.3", "hydra": "1.3.0", "idna": "3.7", "ipaddress": "1.0", "json": "2.0.9", "kiwisolver": "1.4.5", "lightning_fabric": "2.2.5", "lightning_utilities": "0.11.2", "logging": "0.5.1.2", "matplotlib": "3.9.0", "mpmath": "1.3.0", "numcodecs": "0.12.1", "numpy": "1.26.4", "omegaconf": "2.3.0", "onnx": "1.16.1", "onnxruntime": "1.18.0", "packaging": "24.0", "pandas": "2.2.2", "platform": "1.0.8", "platformdirs": "4.2.2", "pooch": "v1.8.2", "psutil": "5.9.8", "pyarrow": "15.0.2", "pynvml": "11.5.0", "pyparsing": "3.1.2", "pyshtools": "4.12.2", "pytorch_lightning": "2.2.5", "pytz": "2024.1", "re": "2.2.1", "requests": "2.32.3", "safetensors": "0.4.3", "scipy": "1.13.1", "six": "1.16.0", "socketserver": "0.4", "sympy": "1.12.1", "timm": "1.0.3", "tomli": "2.0.1", "torch": "2.3.0+cu121", "torch_geometric": "2.4.0", "torchinfo": "1.8.0", "torchmetrics": "1.4.0.post0", "torchvision": "0.18.0+cu121", "tqdm": "4.66.4", "urllib3": "2.2.1", "xarray": "2024.5.0", "yaml": "6.0.1", "zarr": "2.17.0", "zlib": "1.0", "anemoi.datasets": "0.4.3", "anemoi.models": "0.2.0", "anemoi.training": "0.1.dev169+g9156f97.d20240808", "anemoi.utils": "0.3.14.dev2+gb3dc864", "google.protobuf": "4.25.3", "hydra_plugins.anemoi_searchpath": ".../__init__.py"}, "git_versions": {"hydra_plugins.anemoi_searchpath": {"git": {"sha1": "b9b15e2567c3f72677d653977645158338045ad1", "modified_files": 0, "untracked_files": 0}}, "anemoi.models": {"git": {"sha1": "a34cb8b26748844a0b704c6eb14c8f8c69536d6b", "modified_files": 0, "untracked_files": 0}}, "anemoi.training": {"git": {"sha1": "b9b15e2567c3f72677d653977645158338045ad1", "modified_files": 0, "untracked_files": 0}}}}, "timestamp": "2024-08-08T14:27:30.343794+00:00", "uuid": "b0af7aad-a375-4550-a0a6-7b0f63546726", "model": {"model": "AnemoiModelInterface", "trainable_parameters": 254909000, "total_parameters": 254909000, "summary": "===============================================================================================\nLayer (type (var_name)) Param #\n===============================================================================================\nAnemoiModelInterface (AnemoiModelInterface) --\n\u251c\u2500Processors (pre_processors) --\n\u2502 \u2514\u2500ModuleDict (processors) --\n\u2502 \u2502 \u2514\u2500InputNormalizer (normalizer) --\n\u251c\u2500Processors (post_processors) --\n\u2502 \u2514\u2500ModuleDict (processors) --\n\u2502 \u2502 \u2514\u2500InputNormalizer (normalizer) --\n\u251c\u2500AnemoiModelEncProcDec (model) --\n\u2502 \u2514\u2500TrainableTensor (trainable_data) 4,336,640\n\u2502 \u2514\u2500TrainableTensor (trainable_hidden) 322,560\n\u2502 \u2514\u2500GraphTransformerForwardMapper (encoder) --\n\u2502 \u2502 \u2514\u2500TrainableTensor (trainable) 7,891,440\n\u2502 \u2502 \u2514\u2500GraphTransformerMapperBlock (proc) --\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_key) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_query) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_value) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_self) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_edge) 12,288\n\u2502 \u2502 \u2502 \u2514\u2500GraphTransformerConv (conv) --\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500SumAggregation (aggr_module) --\n\u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Sequential (node_dst_mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (0) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (1) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (2) --\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (3) 4,195,328\n\u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2514\u2500Linear (emb_nodes_dst) 13,312\n\u2502 \u2502 \u2514\u2500Linear (emb_nodes_src) 216,064\n\u2502 \u2514\u2500TransformerProcessor (processor) --\n\u2502 \u2502 \u2514\u2500ModuleList (proc) --\n\u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorChunk (0) --\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500ModuleList (blocks) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (0) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (2) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (3) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (4) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (5) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (6) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (7) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorChunk (1) --\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500ModuleList (blocks) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (0) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (2) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (3) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (4) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (5) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (6) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500TransformerProcessorBlock (7) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500MultiHeadSelfAttention (attention) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (lin_qkv) 3,145,728\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Sequential (mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (0) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (1) --\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (2) 4,195,328\n\u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2514\u2500GraphTransformerBackwardMapper (decoder) --\n\u2502 \u2502 \u2514\u2500TrainableTensor (trainable) 13,009,920\n\u2502 \u2502 \u2514\u2500GraphTransformerMapperBlock (proc) --\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_key) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_query) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_value) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_self) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Linear (lin_edge) 12,288\n\u2502 \u2502 \u2502 \u2514\u2500GraphTransformerConv (conv) --\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500SumAggregation (aggr_module) --\n\u2502 \u2502 \u2502 \u2514\u2500Linear (projection) 1,049,600\n\u2502 \u2502 \u2502 \u2514\u2500Sequential (node_dst_mlp) --\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500LayerNorm (0) 2,048\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (1) 4,198,400\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500GELU (2) --\n\u2502 \u2502 \u2502 \u2502 \u2514\u2500Linear (3) 4,195,328\n\u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm1) 2,048\n\u2502 \u2502 \u2502 \u2514\u2500LayerNorm (layer_norm2) 2,048\n\u2502 \u2502 \u2514\u2500Linear (emb_nodes_dst) 216,064\n\u2502 \u2502 \u2514\u2500Sequential (node_data_extractor) --\n\u2502 \u2502 \u2502 \u2514\u2500LayerNorm (0) 2,048\n\u2502 \u2502 \u2502 \u2514\u2500Linear (1) 90,200\n===============================================================================================\nTotal params: 254,909,000\nTrainable params: 254,909,000\nNon-trainable params: 0\n==============================================================================================="}, "tracker": {"null": null}, "training": {"current_epoch": 0, "global_step": 1, "elapsed_time": 57.73511028289795}} \ No newline at end of file diff --git a/fiab/structure.yaml b/fiab/structure.yaml new file mode 100644 index 0000000..3d5b37f --- /dev/null +++ b/fiab/structure.yaml @@ -0,0 +1,67 @@ +# Format: list of models, each model has a model_outputs field which contains a nested tree of nodes +# Nodes have {node: name, cube: list of key value(s) pairs, children: list[nodes]} + +- model: surface_and_atmosphere_model + model_outputs: + - node: root + cube: + class: rd + stream: anemoi + expver: something + lead_time: + type: datetime + format: '%Y-%m-%d %H:%M:%S' + step: 6h + + + children: + - node: pressure_variables + other_metadata: something + cube: + param: ['q', 't', 'u', 'v', 'w', 'z'] + level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000] + + + - node: surface_variables + other_metadata: something + cube: + param: ['sp', 'msl', '10u', '10v', '2t', '2d', 'skt', 'tcw', 'cp', 'tp'] + + # Hypothetical Ocean variables + - node: ocean_variables + cube: + param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"] + ocean_levels: [??, ??] + + # Alternative List of cubes format + - model: surface_and_atmosphere_model + model_outputs: + - node: root + cube: + class: rd + stream: anemoi + expver: something + lead_time: + type: datetime + format: '%Y-%m-%d %H:%M:%S' + step: 6h + + + children: + - node: pressure_variables + other_metadata: something + cube: + param: ['q', 't', 'u', 'v', 'w', 'z'] + level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000] + + + - node: surface_variables + other_metadata: something + cube: + param: ['sp', 'msl', '10u', '10v', '2t', '2d', 'skt', 'tcw', 'cp', 'tp'] + + # Hypothetical Ocean variables + - node: ocean_variables + cube: + param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"] + ocean_levels: [??, ??] \ No newline at end of file diff --git a/src/python/qubed/Qube.py b/src/python/qubed/Qube.py index 34616a5..b649629 100644 --- a/src/python/qubed/Qube.py +++ b/src/python/qubed/Qube.py @@ -66,11 +66,21 @@ class Qube: return Qube.make( key=json["key"], values=values_from_json(json["values"]), - metadata=json["metadata"] if "metadata" in json else {}, + metadata=frozendict(json["metadata"]) if "metadata" in json else {}, children=(from_json(c) for c in json["children"]), ) return from_json(json) + def to_json(self) -> dict: + def to_json(node: Qube) -> dict: + return { + "key": node.key, + "values": node.values.to_json(), + "metadata": dict(node.metadata), + "children": [to_json(c) for c in node.children] + } + return to_json(self) + @classmethod def from_dict(cls, d: dict) -> 'Qube': def from_dict(d: dict) -> list[Qube]: @@ -102,6 +112,12 @@ class Qube: def _repr_html_(self) -> str: return node_tree_to_html(self, depth = 2, collapse = True) + # Allow "key=value/value" / qube to prepend keys + def __rtruediv__(self, other: str) -> "Qube": + key, values = other.split("=") + values = QEnum((values.split("/"))) + return Qube.root_node([Qube.make(key, values, self.children)]) + def __or__(self, other: "Qube") -> "Qube": return set_operations.operation(self, other, set_operations.SetOperation.UNION, type(self)) @@ -176,7 +192,7 @@ class Qube: return dataclasses.replace(self, children = children) - def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Qube': + def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed", prune=True) -> 'Qube': # make all values lists selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()} @@ -187,7 +203,15 @@ class Qube: if node.key not in selection: if mode == "strict": return None - return dataclasses.replace(node, children = not_none(select(c) for c in node.children)) + + new_children = not_none(select(c) for c in node.children) + + # prune==true then remove any non-leaf nodes + # which have had all their children removed + if prune and node.children and not new_children: + return None + + return dataclasses.replace(node, children = new_children) # If the key is specified, check if any of the values match values = QEnum((c for c in selection[node.key] if c in node.values)) diff --git a/src/python/qubed/value_types.py b/src/python/qubed/value_types.py index 816cb2a..2985d41 100644 --- a/src/python/qubed/value_types.py +++ b/src/python/qubed/value_types.py @@ -30,6 +30,10 @@ class Values(ABC): def min(self): pass + @abstractmethod + def to_json(self): + pass + T = TypeVar("T") EnumValuesType = FrozenSet[T] @dataclass(frozen=True, order=True) @@ -61,6 +65,8 @@ class QEnum(Values): return [type(self)(tuple(values))] def min(self): return min(self.values) + def to_json(self): + return list(self.values) @dataclass(frozen=True) class Range(Values, ABC): diff --git a/tests/test_conversions.py b/tests/test_conversions.py new file mode 100644 index 0000000..4d96cfd --- /dev/null +++ b/tests/test_conversions.py @@ -0,0 +1,16 @@ +from qubed import Qube + + +def test_json_round_trip(): + u = Qube.from_dict({ + "class=d1" : { + "dataset=climate-dt/weather-dt" : { + "generation=1/2/3/4" : {}, + }, + "dataset=another-value" : { + "generation=1/2/3" : {}, + }, + } + }) + json = u.to_json() + assert Qube.from_json(json) == u \ No newline at end of file diff --git a/tests/test_iteration.py b/tests/test_iteration.py index ae7f881..a997bf6 100644 --- a/tests/test_iteration.py +++ b/tests/test_iteration.py @@ -16,20 +16,4 @@ def test_iter_leaves_simple(): {"a" : '2', "b" : '2'}, ] - assert set(make_hashable(q.leaves())) == set(make_hashable(entries)) - -# def test_iter_leaves(): -# d = { -# "class=od" : { -# "expver=0001": {"param=1":{}, "param=2":{}}, -# "expver=0002": {"param=1":{}, "param=2":{}}, -# }, -# "class=rd" : { -# "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}}, -# "expver=0002": {"param=1":{}, "param=2":{}}, -# }, -# } -# q = Qube.from_dict(d) -# r = Qube.from_dict(d) - -# assert q == r \ No newline at end of file + assert set(make_hashable(q.leaves())) == set(make_hashable(entries)) \ No newline at end of file