diff --git a/README.md b/README.md index b770b6d..801a3a8 100644 --- a/README.md +++ b/README.md @@ -1,32 +1,21 @@ -![Static Badge](https://img.shields.io/badge/ESEE-Production_Chain-blue?style=flat&label=ESEE&link=github.com%2Fecmwf) -![Static Badge](https://img.shields.io/badge/ESEE-Data_Provision-purple?style=flat&label=ESEE&link=github.com%2Fecmwf) -![Static Badge](https://img.shields.io/badge/ESEE-User_Interaction-green?style=flat&label=ESEE&link=github.com%2Fecmwf) -![Static Badge](https://img.shields.io/badge/ESEE-Foundation-orange?style=flat&label=ESEE&link=github.com%2Fecmwf) - - # Q3 Quick Querying of Qubes This repostitory contains a collection of components designed to deliver user friendly cataloging for ecmwf's data. The STAC Server, Frontend and a periodic job to do tree compression can be deployed together to kubernetes using the [helm chart](./helm_chart). Thise deployment can then be accessed either via the Query Builder Web interface or the python client. ## 📦 Components Overview -### 🌲 [Tree Compressor](./tree_compresser) -> **Python/Rust Package** -📋 Lists the datasets in an **FDB** and converts the output into a **compressed tree representation** for fast querying. - ---- - -### 🚀 [STAC Server](./stac_server) +### 🚀 [Qubed STAC Server](./stac_server) > **FastAPI STAC Server Backend** - 🌟 Implements our proposed [Datacube STAC Extension](./structured_stac.md). - 🛠️ Allows efficient traversal of ECMWF's datacubes. -- 🔗 **[Live Example](http://catalogue.lumi.apps.dte.destination-earth.eu/stac?class=d1&dataset=extremes-dt&expver=0001&stream=oper)**. +- Part of the implementation of this is [🌲 Tree Compressor](./tree_compresser), a **compressed tree representation** optimised for storing trees with many duplicated subtress. +- 🔗 **[Live Example]()**. --- -### 🌐 [Web Query Builder](./web_query_builder) +### 🌐 [Qubed Web Query Builder](./web_query_builder) > **Web Frontend** - 👀 Displays data from the **STAC Server** in an intuitive user interface. @@ -34,7 +23,7 @@ This repostitory contains a collection of components designed to deliver user fr --- -### TODO: 🐍 [Python Query Builder](./python_query_builder) +### TODO: 🐍 [Qubed Python Query Builder](./python_query_builder) > **Python Client** - 🤖 A Python client for the **STAC Server**. diff --git a/tree_compresser/.gitignore b/tree_compresser/.gitignore new file mode 100644 index 0000000..c8f0442 --- /dev/null +++ b/tree_compresser/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/tree_compresser/Cargo.lock b/tree_compresser/Cargo.lock new file mode 100644 index 0000000..8c23cac --- /dev/null +++ b/tree_compresser/Cargo.lock @@ -0,0 +1,589 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys 0.59.0", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "cc" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "env_logger" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" +dependencies = [ + "humantime", + "is-terminal", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2" + +[[package]] +name = "libc" +version = "0.2.164" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" + +[[package]] +name = "libloading" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "portable-atomic" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ebb0c0cc0de9678e53be9ccf8a2ab53045e6e3a8be03393ceccc5e7396ccb40" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80e3ce69c4ec34476534b490e412b871ba03a82e35604c3dfb95fcb6bfb60c09" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09f311c76b36dfd6dd6f7fa6f9f18e7e46a1c937110d283e80b12ba2468a75" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd4f74086536d1e1deaff99ec0387481fb3325c82e4e48be0e75ab3d3fcb487a" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e77dfeb76b32bbf069144a5ea0a36176ab59c8db9ce28732d0f06f096bbfbc8" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "qubed_tree" +version = "0.1.0" +dependencies = [ + "pyo3", + "rsfdb", + "serde", + "serde_json", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rsfdb" +version = "0.1.0" +source = "git+https://github.com/ecmwf/rsfdb?branch=develop#ab8c9590bba15d22167c274db9238cd9b897baf1" +dependencies = [ + "libc", + "libloading", + "once_cell", + "rsfindlibs", + "serde", + "serde_json", +] + +[[package]] +name = "rsfindlibs" +version = "0.1.1" +source = "git+https://github.com/ecmwf-projects/rsfindlibs.git#1358b1049bf3e0b581badfc8005a9828a542cdaa" +dependencies = [ + "cc", + "clap", + "env_logger", + "libloading", + "log", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.133" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/tree_compresser/Cargo.toml b/tree_compresser/Cargo.toml new file mode 100644 index 0000000..9b1793c --- /dev/null +++ b/tree_compresser/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "qubed_tree" +version = "0.1.0" +edition = "2021" + +[dependencies] +rsfdb = {git = "https://github.com/ecmwf/rsfdb", branch = "develop"} +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +pyo3 = "0.23.1" + + +[lib] +name = "tree_traverser" +crate-type = ["cdylib"] +path = "./rust_src/lib.rs" + diff --git a/tree_compresser/README.md b/tree_compresser/README.md new file mode 100644 index 0000000..e69de29 diff --git a/tree_compresser/pyproject.toml b/tree_compresser/pyproject.toml index 391b59f..840ac4e 100644 --- a/tree_compresser/pyproject.toml +++ b/tree_compresser/pyproject.toml @@ -1,13 +1,27 @@ [build-system] -requires = ["setuptools >= 61.0"] -build-backend = "setuptools.build_meta" +requires = ["maturin>=1.7,<2.0"] +build-backend = "maturin" [project] -name = "TreeTraverser" +name = "tree_traverser" description = "Tools to work with compressed Datacubes and Trees" +readme = "README.md" +authors = [ + {name = "Tom Hodson", email = "thomas.hodson@ecmwf.int"}, +] +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +requires-python = ">= 3.11" dynamic = ["version"] dependencies = [ - "fastapi", "pe" ] +[tool.maturin] +python-source = "python_src" +module-name = "tree_traverser.rust" +features = ["pyo3/extension-module"] + diff --git a/tree_compresser/src/TreeTraverser/CompressedTree.py b/tree_compresser/python_src/tree_traverser/CompressedTree.py similarity index 100% rename from tree_compresser/src/TreeTraverser/CompressedTree.py rename to tree_compresser/python_src/tree_traverser/CompressedTree.py diff --git a/tree_compresser/python_src/tree_traverser/__init__.py b/tree_compresser/python_src/tree_traverser/__init__.py new file mode 100644 index 0000000..d1138ce --- /dev/null +++ b/tree_compresser/python_src/tree_traverser/__init__.py @@ -0,0 +1 @@ +from . import rust as backend \ No newline at end of file diff --git a/tree_compresser/src/TreeTraverser/fdb_schema/__init__.py b/tree_compresser/python_src/tree_traverser/fdb_schema/__init__.py similarity index 100% rename from tree_compresser/src/TreeTraverser/fdb_schema/__init__.py rename to tree_compresser/python_src/tree_traverser/fdb_schema/__init__.py diff --git a/tree_compresser/src/TreeTraverser/fdb_schema/fdb_schema_parser.py b/tree_compresser/python_src/tree_traverser/fdb_schema/fdb_schema_parser.py similarity index 100% rename from tree_compresser/src/TreeTraverser/fdb_schema/fdb_schema_parser.py rename to tree_compresser/python_src/tree_traverser/fdb_schema/fdb_schema_parser.py diff --git a/tree_compresser/src/TreeTraverser/fdb_schema/fdb_types.py b/tree_compresser/python_src/tree_traverser/fdb_schema/fdb_types.py similarity index 100% rename from tree_compresser/src/TreeTraverser/fdb_schema/fdb_types.py rename to tree_compresser/python_src/tree_traverser/fdb_schema/fdb_types.py diff --git a/tree_compresser/rust_src/lib.rs b/tree_compresser/rust_src/lib.rs new file mode 100644 index 0000000..f74e242 --- /dev/null +++ b/tree_compresser/rust_src/lib.rs @@ -0,0 +1,88 @@ +#![allow(unused_imports)] +#![allow(dead_code)] +#![allow(unused_variables)] + +use rsfdb::listiterator::KeyValueLevel; +use rsfdb::request::Request; +use rsfdb::FDB; // Make sure the `fdb` crate is correctly specified in the dependencies + +use serde_json::{json, Value}; +use std::time::Instant; + +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyList, PyString}; + +use crate::tree::TreeNode; +use std::collections::HashMap; + +/// Formats the sum of two numbers as string. +#[pyfunction] +#[pyo3(signature = (request, fdb_config = None))] +fn traverse_fdb( + request: HashMap>, + fdb_config: Option<&str>, +) -> PyResult { + let start_time = Instant::now(); + let fdb = FDB::new(fdb_config).unwrap(); + + let list_request = + Request::from_json(json!(request)).expect("Failed to create request from python dict"); + + let list = fdb.list(&list_request, true, true).unwrap(); + + // for item in list { + // for kvl in item.request { + // println!("{:?}", kvl); + // } + // } + + let mut root = TreeNode::new(KeyValueLevel { + key: "root".to_string(), + value: "root".to_string(), + level: 0, + }); + + for item in list { + if let Some(request) = &item.request { + root.insert(&request); + } + } + + // Traverse and print the tree + root.traverse(0, &|node, level| { + let indent = " ".repeat(level); + println!("{}{}={}", indent, node.key.key, node.key.value); + }); + + // Convert the tree to JSON + // let json_output = root.to_json(); + + // // Print the JSON output + // // println!("{}", serde_json::to_string_pretty(&json_output).unwrap()); + // std::fs::write( + // "output.json", + // serde_json::to_string_pretty(&json_output).unwrap(), + // ) + // .expect("Unable to write file"); + + // let duration = start_time.elapsed(); + // println!("Total runtime: {:?}", duration); + + Ok(("test").to_string()) +} + +use pyo3::prelude::*; + +/// Formats the sum of two numbers as string. +#[pyfunction] +fn sum_as_string(a: usize, b: usize) -> PyResult { + Ok((a + b + 2).to_string()) +} + +/// A Python module implemented in Rust. The name of this function must match +/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to +/// import the module. +#[pymodule] +fn rust(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(traverse_fdb, m)?) +} diff --git a/tree_compresser/rust_src/tree.rs b/tree_compresser/rust_src/tree.rs new file mode 100644 index 0000000..9ab2488 --- /dev/null +++ b/tree_compresser/rust_src/tree.rs @@ -0,0 +1,66 @@ +#[derive(Debug)] +pub struct TreeNode { + key: KeyValueLevel, + children: Vec, +} + +impl TreeNode { + pub fn new(key: KeyValueLevel) -> Self { + TreeNode { + key, + children: Vec::new(), + } + } + + pub fn insert(&mut self, path: &[KeyValueLevel]) { + if path.is_empty() { + return; + } + + let kvl = &path[0]; + + // Check if a child with the same key and value exists + if let Some(child) = self.children.iter_mut().find(|child| child.key == *kvl) { + // Insert the remaining path into the existing child + child.insert(&path[1..]); + } else { + // Create a new child node + let mut new_child = TreeNode::new(kvl.clone()); + new_child.insert(&path[1..]); + self.children.push(new_child); + } + } + + pub fn traverse(&self, level: usize, callback: &F) + where + F: Fn(&TreeNode, usize), + { + callback(self, level); + for child in &self.children { + child.traverse(level + 1, callback); + } + } + + pub fn to_json(&self) -> Value { + let formatted_key = format!("{}={}", self.key.key, self.key.value); + + let children_json: Value = if self.children.is_empty() { + Value::Object(serde_json::Map::new()) + } else { + Value::Object( + self.children + .iter() + .map(|child| { + ( + format!("{}={}", child.key.key, child.key.value), + child.to_json(), + ) + }) + .collect(), + ) + }; + + // Combine the formatted key with children + serde_json::json!({ formatted_key: children_json }) + } +} diff --git a/tree_compresser/tests/test.py b/tree_compresser/tests/test.py new file mode 100644 index 0000000..63a693e --- /dev/null +++ b/tree_compresser/tests/test.py @@ -0,0 +1,26 @@ +from tree_traverser import backend + + + +config = """ +--- +type: remote +host: databridge-prod-catalogue1-ope.ewctest.link +port: 10000 +engine: remote +store: remote +""" + +def massage_request(r): + return {k : v if isinstance(v, list) else [v] + for k, v in r.items()} + +request = { + "class": "d1", + "dataset": "extremes-dt", + "expver": "0001", + "stream": "oper", + "date": ["20241117", "20241116"], + } + +backend.traverse_fdb(massage_request(request), fdb_config = config)