qubed/notebooks/test.ipynb
2025-02-12 12:44:30 +00:00

656 lines
21 KiB
Plaintext
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d4ca1d75-6dec-48d3-a448-d46bb0d65602",
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d9966f80-7bd3-4404-920e-c8262f304a02",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <style>\n",
" .qubed-tree-view {\n",
" font-family: monospace;\n",
" white-space: pre;\n",
" }\n",
" .qubed-tree-view details {\n",
" # display: inline;\n",
" margin-left: 0;\n",
" }\n",
" .qubed-tree-view summary {\n",
" list-style: none;\n",
" cursor: pointer;\n",
" text-overflow: ellipsis;\n",
" overflow: hidden;\n",
" text-wrap: nowrap;\n",
" display: block;\n",
" }\n",
"\n",
" .qubed-tree-view .leaf {\n",
" text-overflow: ellipsis;\n",
" overflow: hidden;\n",
" text-wrap: nowrap;\n",
" display: block;\n",
" }\n",
"\n",
" .qubed-tree-view summary:hover,span.leaf:hover {\n",
" background-color: #f0f0f0;\n",
" }\n",
" .qubed-tree-view details > summary::after {\n",
" content: ' ▲';\n",
" }\n",
" .qubed-tree-view details:not([open]) > summary::after {\n",
" content: \" ▼\";\n",
" }\n",
" </style>\n",
"\n",
" <pre class='qubed-tree-view'><details open><summary>root</summary><details open><summary>├── class=od</summary><details ><summary>│ ├── expver=0001/0002/0003</summary><span class=\"leaf\">│ │ ├── param=1</span><span class=\"leaf\">│ │ └── param=2</span></details><details ><summary>│ └── expver=0003/0004</summary><span class=\"leaf\">│ ├── param=1</span><span class=\"leaf\">│ └── param=2</span></details></details><details open><summary>└── class=rd</summary><details ><summary> ├── expver=0001/0002</summary><span class=\"leaf\"> │ ├── param=1</span><span class=\"leaf\"> │ └── param=2</span></details><details ><summary> └── expver=0003/0004</summary><span class=\"leaf\"> ├── param=1</span><span class=\"leaf\"> └── param=2</span></details></details></details></pre>"
],
"text/plain": [
"Qube(data=NodeData(key='root', values=Enum(values=('root',)), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='class', values=Enum(values=('od',)), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='expver', values=Enum(values=('0001', '0002', '0003')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=()))), Qube(data=NodeData(key='expver', values=Enum(values=('0003', '0004')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=()))))), Qube(data=NodeData(key='class', values=Enum(values=('rd',)), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='expver', values=Enum(values=('0001', '0002')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=()))), Qube(data=NodeData(key='expver', values=Enum(values=('0003', '0004')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=())))))))"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from qubed import Qube\n",
"\n",
"q = Qube.from_dict({\n",
" \"class=od\" : {\n",
" \"expver=0001/0002/0003\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=rd\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
"})\n",
"\n",
"q"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "261f32c8-74c6-4cc9-9000-bf9bf9ff3456",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"root\n",
"├── class=od\n",
"│ ├── expver=0001\n",
"│ │ ├── param=1\n",
"│ │ └── param=2\n",
"│ └── expver=0002\n",
"│ ├── param=1\n",
"│ └── param=2\n",
"└── class=rd\n",
" ├── expver=0001\n",
" │ ├── param=1\n",
" │ ├── param=2\n",
" │ └── param=3\n",
" └── expver=0002\n",
" ├── param=1\n",
" └── param=2\n",
"\n"
]
}
],
"source": [
"from qubed import Qube\n",
"\n",
"q = Qube.from_dict({\n",
" \"class=od\" : {\n",
" \"expver=0001\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=rd\" : {\n",
" \"expver=0001\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
" \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
"})\n",
"\n",
"q.print()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "7d1b353c-44a6-45be-bd02-2116771ed84d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"root\n",
"├── class=od, expver=0001/0002, param=1/2\n",
"└── class=rd\n",
" ├── expver=0001, param=1/2/3\n",
" └── expver=0002, param=1/2\n",
"\n"
]
}
],
"source": [
"q.compress().print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "73193a0d-2d0d-4d64-9f3c-fc5f5dfe7c5e",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"from tree_traverser.DataCubeTree import Tree\n",
"from tree_traverser.CompressedDataCubeTree import CompressedTree\n",
"\n",
"t1 = Tree.from_dict({\n",
" \"class=od\" : {\n",
" \"expver=0001/0002/0003\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=rd\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" # \"class=cd\" : {\n",
" # \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" # \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" # \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
" # }\n",
"})\n",
"\n",
"print(str(t1))\n",
"\n",
"ct = CompressedTree.from_tree(t1).compress()\n",
"print(str(ct))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c025d24e-e769-499e-90c3-efc836c4b399",
"metadata": {},
"outputs": [],
"source": [
"def tree_from_dict(d):\n",
" if not d: return {}\n",
" k = next(iter(d))\n",
" v = d.pop(k)\n",
" if not isinstance(v, list): v = [v,]\n",
" return {f\"{k}={'/'.join(v)}\" : tree_from_dict(d)}\n",
"\n",
"t1 = Tree.from_dict(tree_from_dict({'class': ['d1', 'd2'],\n",
" 'dataset': 'climate-dt',\n",
" 'generation': ['1','2','3'],\n",
" 'model': 'icon',\n",
" 'date': ['20241102','20241103'],\n",
" 'resolution': ['high','low'],\n",
" 'time': ['0000', '0600', '1200', '1800'],\n",
"}))\n",
"print(str(t1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c85ba0e3-6754-4227-8dd5-ccca57714420",
"metadata": {},
"outputs": [],
"source": [
"t1 = Tree.from_dict({\n",
" \"class=od\" : {\n",
" \"expver=0001\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=rd\" : {\n",
" \"expver=0001\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
" \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
"})\n",
"\n",
"t1.print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17145fb0-82ec-4716-9eca-83fdae4fbcdf",
"metadata": {},
"outputs": [],
"source": [
"ct = CompressedTree.from_tree(t1).compress()\n",
"ct.print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f70e9e53-326a-4158-bf62-339fd765e5f8",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"import orjson as json\n",
"from tree_traverser.DataCubeTree import Tree\n",
"# from tree_traverser.CompressedDataCubeTree import CompressedTree\n",
"\n",
"t1 = Tree.from_dict({\n",
" \"class=od\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=rd\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=cd\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
" }\n",
"})\n",
"\n",
"t1.html(depth=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f831008-7e36-4c51-9cf8-b10af69b35c0",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"import orjson as json\n",
"from tree_traverser.DataCubeTree import Tree\n",
"from tree_traverser.CompressedDataCubeTree import CompressedTree\n",
"\n",
"t1 = Tree.from_dict({\n",
" \"class=od\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=rd\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=cd\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
" }\n",
"})\n",
"\n",
"t1.html(depth=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "49d0f344-34ad-476b-bb27-7441b9d8cddb",
"metadata": {},
"outputs": [],
"source": [
"t1 = Tree.empty()\n",
"\n",
"t1 = t1.insert(dict(a = [1,2,3], b = [2,3,4]))\n",
"t1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cdab9360-f465-4570-9670-29a9567dc962",
"metadata": {},
"outputs": [],
"source": [
"t1 = t1.insert(dict(a = [4,5,6], b = [2,3,4]))\n",
"t1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4189ea19-793d-46e0-bbd6-550a14c9626c",
"metadata": {},
"outputs": [],
"source": [
"t1 = t1.insert(dict(a = [1], b = [5]))\n",
"t1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1564a02-9d47-43af-990d-54493a76e029",
"metadata": {},
"outputs": [],
"source": [
"ct = CompressedTree.from_tree(t1).compress()\n",
"ct.html(debug = False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f50d6f52-b417-4dae-8b94-a4fe5535d0ca",
"metadata": {},
"outputs": [],
"source": [
"t1 = Tree.from_dict({\n",
" \"class=od\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=rd\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" },\n",
" \"class=cd\" : {\n",
" \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
" \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
" }\n",
"})\n",
"\n",
"t1.html(depth=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af63c581-aa28-4f35-be7f-0044ccdce671",
"metadata": {},
"outputs": [],
"source": [
"ct = CompressedTree.from_tree(t1).compress()\n",
"ct.html(debug = False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15ec656b-d7fa-45f9-97a4-0615ee8104ee",
"metadata": {},
"outputs": [],
"source": [
"ct = CompressedTree.from_tree(t1).compress()\n",
"print(str(ct))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eeb388cd-d9e4-4959-97fa-c8874abf68d9",
"metadata": {},
"outputs": [],
"source": [
"ct.lookup({\n",
" \"class\" : \"rd\"\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "52a6a4de-ca76-48a0-8931-ea3f0569d3a4",
"metadata": {},
"outputs": [],
"source": [
"for k in sorted(ct.cache):\n",
" node = ct.cache[k]\n",
" print(k, ct.cache[k].summary())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea5ee804-740b-435d-9455-b22ac6ba07db",
"metadata": {},
"outputs": [],
"source": [
"with open(\"keys.txt\", \"r\") as f:\n",
" for line in f.readlines():\n",
" print(line)\n",
" j = json.loads(line.replace(\"'\", '\"'))\n",
" path = j[\"path\"]\n",
" keys = j[\"keys\"]\n",
" offset = j[\"offset\"]\n",
" \n",
" print(path, keys)\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf2d2099-98d4-4e41-9fb9-1f2f4cbe3bd9",
"metadata": {},
"outputs": [],
"source": [
"ct[\"class\", \"od\"][\"expver\", \"0001\"].root._children, ct[\"class\", \"od\"][\"expver\", \"0003\"].root._children, "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9cb2619b-a0aa-4a1e-970b-04bf5da8a784",
"metadata": {},
"outputs": [],
"source": [
"from tree_traverser.trie import Trie\n",
"\n",
"t = Trie()\n",
"i = t.insert(\"/data/prod_1/fdb/d1:\")\n",
"t"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9842fb8-2bef-48fc-bd59-4526087c2cf7",
"metadata": {},
"outputs": [],
"source": [
"t.lookup_by_id(i)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e4ef453-b20f-489f-9ef9-d197626fc71e",
"metadata": {},
"outputs": [],
"source": [
"data_path = Path(\"/Users/math/git/rust/qubed/config/climate-dt/new_format.json\")\n",
"with data_path.open(\"r\") as f:\n",
" climate_dt = Tree.from_json(json.loads(f.read()))\n",
"\n",
"# climate_dt = climate_dt.guess_datatypes()\n",
"\n",
"filtered = climate_dt.select({\n",
" # \"activity\": \"scenariomip\",\n",
" # \"date\": \"20201103\",\n",
" # \"model\": \"ifs-nemo\",\n",
" # \"levtype\": \"sfc\",\n",
" # \"param\": \"129\",\n",
" # \"stream\": \"clte\",\n",
"})\n",
"\n",
"filtered.html(depth = 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "684c2520-bcb6-4e1c-acb3-e116a742f3d1",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "61a54328-e64c-4f39-9123-3d9890316678",
"metadata": {},
"outputs": [],
"source": [
"user_request = {\n",
" \"activity\": \"scenariomip\",\n",
" \"datetime\": {\"from\": \"2020-11-03:09:00\", \"to\": \"2021-11-03:06:00\"},\n",
" \"model\": \"ifs-nemo\",\n",
" \"levtype\": \"sfc\",\n",
" ...\n",
"}\n",
"\n",
"# Load from disk, from the api, from somewhere\n",
"# Has information about grids and FDB URIs\n",
"climate_dt_tree = Tree.load(...) \n",
"\n",
"# Do some preliminary filtering on the tree, eliminating \n",
"# If the FDB URI info is too heavy we could choose to only load it after filtering\n",
"filtered_tree = climate_dt_tree.select(user_request)\n",
"\n",
"# Polytope takes the user request and the now filtered tree\n",
"# uses the grid information and other info to decide what indices it wants from grib jump for each leaf of the tree\n",
"# encodes this using the payload mechanism of the tree\n",
"index_tree_for_gribjump = polytope.do(user_request, filtered_tree)\n",
"\n",
"# This tree has the data associated with each leaf attached to it\n",
"data_tree = send_to_gribjump(index_tree_for_gribjump)\n",
"\n",
"output = polytope.process(data_tree)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20b05adb-b760-4e6e-9ccf-8e2531cd8d5d",
"metadata": {},
"outputs": [],
"source": [
"filtered.print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b769f3d7-d0c3-42da-8b6d-64f00bc63dd9",
"metadata": {},
"outputs": [],
"source": [
"import unicodedata\n",
"\n",
"with open(\"/Users/math/Downloads/cads-forms-reanalysis-prod@da54febf9c6/reanalysis-era5-single-levels/gecko-config/mars.list\", \"r\") as f:\n",
" levels = []\n",
" for i, l in enumerate(f.readlines()):\n",
" level = len(l)-len(l.lstrip(' '))\n",
" l = l.lstrip().rstrip()\n",
" if level == 2:\n",
" assert len(levels) == 2\n",
" kvs = [kv for lv in levels + [l] for kv in lv.split(\",\")]\n",
" d = {}\n",
" for kv in kvs:\n",
" key, values = kv.split(\"=\")\n",
" values = values.split(\"/\")\n",
" assert key not in d\n",
" d[key] = values\n",
" print(d)\n",
" else:\n",
" levels = levels[:level]\n",
" levels.append(l)\n",
" if i > 10: break"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d62dfb70-6fd8-4096-9c01-2210bb010197",
"metadata": {},
"outputs": [],
"source": [
"r = dict(a = 1, b = 2, c = 3)\n",
"while r:\n",
" print(r.popitem())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60929956-bf0b-4f2d-95a4-a6b9f1708e73",
"metadata": {},
"outputs": [],
"source": [
"s = \"{class=d1,dataset=climate-dt,activity=scenariomip,experiment=ssp3-7.0,generation=1,model=icon,realization=1,expver=0001,stream=clte,date=20241102}{resolution=high,type=fc,levtype=sfc}{time=0000,param=168}\"\n",
"\n",
"dict(l.split(\"=\", maxsplit=1) for l in s.replace(\"{\", \" \").replace(\"}\", \" \").replace(\",\", \" \").split())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "152612d2-14f3-4639-8bc8-e4de7fad4ef4",
"metadata": {},
"outputs": [],
"source": [
"str.split?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8b21a006-37ef-49ac-81ef-97cc9e70c63e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:micromamba-catalogs]",
"language": "python",
"name": "conda-env-micromamba-catalogs-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}