{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d4ca1d75-6dec-48d3-a448-d46bb0d65602", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 6, "id": "d9966f80-7bd3-4404-920e-c8262f304a02", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", "\n", "
root
├── class=od
│ ├── expver=0001/0002/0003│ │ ├── param=1│ │ └── param=2
│ └── expver=0003/0004│ ├── param=1│ └── param=2
└── class=rd
├── expver=0001/0002 │ ├── param=1 │ └── param=2
└── expver=0003/0004 ├── param=1 └── param=2
" ], "text/plain": [ "Qube(data=NodeData(key='root', values=Enum(values=('root',)), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='class', values=Enum(values=('od',)), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='expver', values=Enum(values=('0001', '0002', '0003')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=()))), Qube(data=NodeData(key='expver', values=Enum(values=('0003', '0004')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=()))))), Qube(data=NodeData(key='class', values=Enum(values=('rd',)), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='expver', values=Enum(values=('0001', '0002')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=()))), Qube(data=NodeData(key='expver', values=Enum(values=('0003', '0004')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=())))))))" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from qubed import Qube\n", "\n", "q = Qube.from_dict({\n", " \"class=od\" : {\n", " \"expver=0001/0002/0003\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=rd\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", "})\n", "\n", "q" ] }, { "cell_type": "code", "execution_count": 19, "id": "261f32c8-74c6-4cc9-9000-bf9bf9ff3456", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "root\n", "├── class=od\n", "│ ├── expver=0001\n", "│ │ ├── param=1\n", "│ │ └── param=2\n", "│ └── expver=0002\n", "│ ├── param=1\n", "│ └── param=2\n", "└── class=rd\n", " ├── expver=0001\n", " │ ├── param=1\n", " │ ├── param=2\n", " │ └── param=3\n", " └── expver=0002\n", " ├── param=1\n", " └── param=2\n", "\n" ] } ], "source": [ "from qubed import Qube\n", "\n", "q = Qube.from_dict({\n", " \"class=od\" : {\n", " \"expver=0001\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=rd\" : {\n", " \"expver=0001\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n", " \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", "})\n", "\n", "q.print()" ] }, { "cell_type": "code", "execution_count": 25, "id": "7d1b353c-44a6-45be-bd02-2116771ed84d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "root\n", "├── class=od, expver=0001/0002, param=1/2\n", "└── class=rd\n", " ├── expver=0001, param=1/2/3\n", " └── expver=0002, param=1/2\n", "\n" ] } ], "source": [ "q.compress().print()" ] }, { "cell_type": "code", "execution_count": null, "id": "73193a0d-2d0d-4d64-9f3c-fc5f5dfe7c5e", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "from tree_traverser.DataCubeTree import Tree\n", "from tree_traverser.CompressedDataCubeTree import CompressedTree\n", "\n", "t1 = Tree.from_dict({\n", " \"class=od\" : {\n", " \"expver=0001/0002/0003\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=rd\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " # \"class=cd\" : {\n", " # \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " # \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " # \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n", " # }\n", "})\n", "\n", "print(str(t1))\n", "\n", "ct = CompressedTree.from_tree(t1).compress()\n", "print(str(ct))" ] }, { "cell_type": "code", "execution_count": null, "id": "c025d24e-e769-499e-90c3-efc836c4b399", "metadata": {}, "outputs": [], "source": [ "def tree_from_dict(d):\n", " if not d: return {}\n", " k = next(iter(d))\n", " v = d.pop(k)\n", " if not isinstance(v, list): v = [v,]\n", " return {f\"{k}={'/'.join(v)}\" : tree_from_dict(d)}\n", "\n", "t1 = Tree.from_dict(tree_from_dict({'class': ['d1', 'd2'],\n", " 'dataset': 'climate-dt',\n", " 'generation': ['1','2','3'],\n", " 'model': 'icon',\n", " 'date': ['20241102','20241103'],\n", " 'resolution': ['high','low'],\n", " 'time': ['0000', '0600', '1200', '1800'],\n", "}))\n", "print(str(t1))" ] }, { "cell_type": "code", "execution_count": null, "id": "c85ba0e3-6754-4227-8dd5-ccca57714420", "metadata": {}, "outputs": [], "source": [ "t1 = Tree.from_dict({\n", " \"class=od\" : {\n", " \"expver=0001\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=rd\" : {\n", " \"expver=0001\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n", " \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", "})\n", "\n", "t1.print()" ] }, { "cell_type": "code", "execution_count": null, "id": "17145fb0-82ec-4716-9eca-83fdae4fbcdf", "metadata": {}, "outputs": [], "source": [ "ct = CompressedTree.from_tree(t1).compress()\n", "ct.print()" ] }, { "cell_type": "code", "execution_count": null, "id": "f70e9e53-326a-4158-bf62-339fd765e5f8", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "import orjson as json\n", "from tree_traverser.DataCubeTree import Tree\n", "# from tree_traverser.CompressedDataCubeTree import CompressedTree\n", "\n", "t1 = Tree.from_dict({\n", " \"class=od\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=rd\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=cd\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n", " }\n", "})\n", "\n", "t1.html(depth=5)" ] }, { "cell_type": "code", "execution_count": null, "id": "4f831008-7e36-4c51-9cf8-b10af69b35c0", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "import orjson as json\n", "from tree_traverser.DataCubeTree import Tree\n", "from tree_traverser.CompressedDataCubeTree import CompressedTree\n", "\n", "t1 = Tree.from_dict({\n", " \"class=od\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=rd\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=cd\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n", " }\n", "})\n", "\n", "t1.html(depth=5)" ] }, { "cell_type": "code", "execution_count": null, "id": "49d0f344-34ad-476b-bb27-7441b9d8cddb", "metadata": {}, "outputs": [], "source": [ "t1 = Tree.empty()\n", "\n", "t1 = t1.insert(dict(a = [1,2,3], b = [2,3,4]))\n", "t1" ] }, { "cell_type": "code", "execution_count": null, "id": "cdab9360-f465-4570-9670-29a9567dc962", "metadata": {}, "outputs": [], "source": [ "t1 = t1.insert(dict(a = [4,5,6], b = [2,3,4]))\n", "t1" ] }, { "cell_type": "code", "execution_count": null, "id": "4189ea19-793d-46e0-bbd6-550a14c9626c", "metadata": {}, "outputs": [], "source": [ "t1 = t1.insert(dict(a = [1], b = [5]))\n", "t1" ] }, { "cell_type": "code", "execution_count": null, "id": "d1564a02-9d47-43af-990d-54493a76e029", "metadata": {}, "outputs": [], "source": [ "ct = CompressedTree.from_tree(t1).compress()\n", "ct.html(debug = False)" ] }, { "cell_type": "code", "execution_count": null, "id": "f50d6f52-b417-4dae-8b94-a4fe5535d0ca", "metadata": {}, "outputs": [], "source": [ "t1 = Tree.from_dict({\n", " \"class=od\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=rd\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " },\n", " \"class=cd\" : {\n", " \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n", " \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n", " }\n", "})\n", "\n", "t1.html(depth=5)" ] }, { "cell_type": "code", "execution_count": null, "id": "af63c581-aa28-4f35-be7f-0044ccdce671", "metadata": {}, "outputs": [], "source": [ "ct = CompressedTree.from_tree(t1).compress()\n", "ct.html(debug = False)" ] }, { "cell_type": "code", "execution_count": null, "id": "15ec656b-d7fa-45f9-97a4-0615ee8104ee", "metadata": {}, "outputs": [], "source": [ "ct = CompressedTree.from_tree(t1).compress()\n", "print(str(ct))" ] }, { "cell_type": "code", "execution_count": null, "id": "eeb388cd-d9e4-4959-97fa-c8874abf68d9", "metadata": {}, "outputs": [], "source": [ "ct.lookup({\n", " \"class\" : \"rd\"\n", "})" ] }, { "cell_type": "code", "execution_count": null, "id": "52a6a4de-ca76-48a0-8931-ea3f0569d3a4", "metadata": {}, "outputs": [], "source": [ "for k in sorted(ct.cache):\n", " node = ct.cache[k]\n", " print(k, ct.cache[k].summary())" ] }, { "cell_type": "code", "execution_count": null, "id": "ea5ee804-740b-435d-9455-b22ac6ba07db", "metadata": {}, "outputs": [], "source": [ "with open(\"keys.txt\", \"r\") as f:\n", " for line in f.readlines():\n", " print(line)\n", " j = json.loads(line.replace(\"'\", '\"'))\n", " path = j[\"path\"]\n", " keys = j[\"keys\"]\n", " offset = j[\"offset\"]\n", " \n", " print(path, keys)\n", " break" ] }, { "cell_type": "code", "execution_count": null, "id": "cf2d2099-98d4-4e41-9fb9-1f2f4cbe3bd9", "metadata": {}, "outputs": [], "source": [ "ct[\"class\", \"od\"][\"expver\", \"0001\"].root._children, ct[\"class\", \"od\"][\"expver\", \"0003\"].root._children, " ] }, { "cell_type": "code", "execution_count": null, "id": "9cb2619b-a0aa-4a1e-970b-04bf5da8a784", "metadata": {}, "outputs": [], "source": [ "from tree_traverser.trie import Trie\n", "\n", "t = Trie()\n", "i = t.insert(\"/data/prod_1/fdb/d1:\")\n", "t" ] }, { "cell_type": "code", "execution_count": null, "id": "c9842fb8-2bef-48fc-bd59-4526087c2cf7", "metadata": {}, "outputs": [], "source": [ "t.lookup_by_id(i)" ] }, { "cell_type": "code", "execution_count": null, "id": "6e4ef453-b20f-489f-9ef9-d197626fc71e", "metadata": {}, "outputs": [], "source": [ "data_path = Path(\"/Users/math/git/rust/qubed/config/climate-dt/new_format.json\")\n", "with data_path.open(\"r\") as f:\n", " climate_dt = Tree.from_json(json.loads(f.read()))\n", "\n", "# climate_dt = climate_dt.guess_datatypes()\n", "\n", "filtered = climate_dt.select({\n", " # \"activity\": \"scenariomip\",\n", " # \"date\": \"20201103\",\n", " # \"model\": \"ifs-nemo\",\n", " # \"levtype\": \"sfc\",\n", " # \"param\": \"129\",\n", " # \"stream\": \"clte\",\n", "})\n", "\n", "filtered.html(depth = 1)" ] }, { "cell_type": "code", "execution_count": null, "id": "684c2520-bcb6-4e1c-acb3-e116a742f3d1", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "61a54328-e64c-4f39-9123-3d9890316678", "metadata": {}, "outputs": [], "source": [ "user_request = {\n", " \"activity\": \"scenariomip\",\n", " \"datetime\": {\"from\": \"2020-11-03:09:00\", \"to\": \"2021-11-03:06:00\"},\n", " \"model\": \"ifs-nemo\",\n", " \"levtype\": \"sfc\",\n", " ...\n", "}\n", "\n", "# Load from disk, from the api, from somewhere\n", "# Has information about grids and FDB URIs\n", "climate_dt_tree = Tree.load(...) \n", "\n", "# Do some preliminary filtering on the tree, eliminating \n", "# If the FDB URI info is too heavy we could choose to only load it after filtering\n", "filtered_tree = climate_dt_tree.select(user_request)\n", "\n", "# Polytope takes the user request and the now filtered tree\n", "# uses the grid information and other info to decide what indices it wants from grib jump for each leaf of the tree\n", "# encodes this using the payload mechanism of the tree\n", "index_tree_for_gribjump = polytope.do(user_request, filtered_tree)\n", "\n", "# This tree has the data associated with each leaf attached to it\n", "data_tree = send_to_gribjump(index_tree_for_gribjump)\n", "\n", "output = polytope.process(data_tree)" ] }, { "cell_type": "code", "execution_count": null, "id": "20b05adb-b760-4e6e-9ccf-8e2531cd8d5d", "metadata": {}, "outputs": [], "source": [ "filtered.print()" ] }, { "cell_type": "code", "execution_count": null, "id": "b769f3d7-d0c3-42da-8b6d-64f00bc63dd9", "metadata": {}, "outputs": [], "source": [ "import unicodedata\n", "\n", "with open(\"/Users/math/Downloads/cads-forms-reanalysis-prod@da54febf9c6/reanalysis-era5-single-levels/gecko-config/mars.list\", \"r\") as f:\n", " levels = []\n", " for i, l in enumerate(f.readlines()):\n", " level = len(l)-len(l.lstrip(' '))\n", " l = l.lstrip().rstrip()\n", " if level == 2:\n", " assert len(levels) == 2\n", " kvs = [kv for lv in levels + [l] for kv in lv.split(\",\")]\n", " d = {}\n", " for kv in kvs:\n", " key, values = kv.split(\"=\")\n", " values = values.split(\"/\")\n", " assert key not in d\n", " d[key] = values\n", " print(d)\n", " else:\n", " levels = levels[:level]\n", " levels.append(l)\n", " if i > 10: break" ] }, { "cell_type": "code", "execution_count": null, "id": "d62dfb70-6fd8-4096-9c01-2210bb010197", "metadata": {}, "outputs": [], "source": [ "r = dict(a = 1, b = 2, c = 3)\n", "while r:\n", " print(r.popitem())" ] }, { "cell_type": "code", "execution_count": null, "id": "60929956-bf0b-4f2d-95a4-a6b9f1708e73", "metadata": {}, "outputs": [], "source": [ "s = \"{class=d1,dataset=climate-dt,activity=scenariomip,experiment=ssp3-7.0,generation=1,model=icon,realization=1,expver=0001,stream=clte,date=20241102}{resolution=high,type=fc,levtype=sfc}{time=0000,param=168}\"\n", "\n", "dict(l.split(\"=\", maxsplit=1) for l in s.replace(\"{\", \" \").replace(\"}\", \" \").replace(\",\", \" \").split())" ] }, { "cell_type": "code", "execution_count": null, "id": "152612d2-14f3-4639-8bc8-e4de7fad4ef4", "metadata": {}, "outputs": [], "source": [ "str.split?" ] }, { "cell_type": "code", "execution_count": null, "id": "8b21a006-37ef-49ac-81ef-97cc9e70c63e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:micromamba-catalogs]", "language": "python", "name": "conda-env-micromamba-catalogs-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.0" } }, "nbformat": 4, "nbformat_minor": 5 }