reogranise

This commit is contained in:
Tom 2025-02-12 12:44:30 +00:00
parent 847bd0ab12
commit 7bafcda627
39 changed files with 747 additions and 2196 deletions

4
.gitignore vendored
View File

@ -6,4 +6,6 @@ config.yaml
raw_list
*.egg-info/
deps/
docs/_build/
docs/_build/
docs/jupyter_execute
target/

27
.readthedocs.yaml Normal file
View File

@ -0,0 +1,27 @@
# .readthedocs.yaml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-20.04
tools:
python: "3.12"
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# If using Sphinx, optionally build your docs in additional formats such as PDF
# formats:
# - pdf
python:
install:
- method: pip
path: .
extra_requirements:
- docs

View File

@ -47,17 +47,18 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
dependencies = [
"windows-sys 0.59.0",
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.6"
version = "3.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125"
checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
dependencies = [
"anstyle",
"windows-sys 0.59.0",
"once_cell",
"windows-sys",
]
[[package]]
@ -68,9 +69,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "cc"
version = "1.2.1"
version = "1.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47"
checksum = "c7777341816418c02e033934a09f20dc0ccaf65a5201ef8a450ae0105a573fda"
dependencies = [
"shlex",
]
@ -83,9 +84,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.21"
version = "4.5.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f"
checksum = "8acebd8ad879283633b343856142139f2da2317c96b05b4dd6181c61e2480184"
dependencies = [
"clap_builder",
"clap_derive",
@ -93,9 +94,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.21"
version = "4.5.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec"
checksum = "f6ba32cbda51c7e1dfd49acc1457ba1a7dec5b64fe360e828acb13ca8dc9c2f9"
dependencies = [
"anstream",
"anstyle",
@ -105,9 +106,9 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.5.18"
version = "4.5.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed"
dependencies = [
"heck",
"proc-macro2",
@ -117,9 +118,9 @@ dependencies = [
[[package]]
name = "clap_lex"
version = "0.7.3"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7"
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
[[package]]
name = "colorchoice"
@ -166,13 +167,13 @@ checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
[[package]]
name = "is-terminal"
version = "0.4.13"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b"
checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37"
dependencies = [
"hermit-abi",
"libc",
"windows-sys 0.52.0",
"windows-sys",
]
[[package]]
@ -183,15 +184,15 @@ checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itoa"
version = "1.0.13"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2"
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
[[package]]
name = "libc"
version = "0.2.164"
version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "libloading"
@ -205,9 +206,9 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.22"
version = "0.4.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
[[package]]
name = "memchr"
@ -226,30 +227,30 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.20.2"
version = "1.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
[[package]]
name = "portable-atomic"
version = "1.9.0"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
[[package]]
name = "proc-macro2"
version = "1.0.92"
version = "1.0.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
dependencies = [
"unicode-ident",
]
[[package]]
name = "pyo3"
version = "0.23.1"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ebb0c0cc0de9678e53be9ccf8a2ab53045e6e3a8be03393ceccc5e7396ccb40"
checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc"
dependencies = [
"cfg-if",
"indoc",
@ -265,9 +266,9 @@ dependencies = [
[[package]]
name = "pyo3-build-config"
version = "0.23.1"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80e3ce69c4ec34476534b490e412b871ba03a82e35604c3dfb95fcb6bfb60c09"
checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7"
dependencies = [
"once_cell",
"target-lexicon",
@ -275,9 +276,9 @@ dependencies = [
[[package]]
name = "pyo3-ffi"
version = "0.23.1"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b09f311c76b36dfd6dd6f7fa6f9f18e7e46a1c937110d283e80b12ba2468a75"
checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d"
dependencies = [
"libc",
"pyo3-build-config",
@ -285,9 +286,9 @@ dependencies = [
[[package]]
name = "pyo3-macros"
version = "0.23.1"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd4f74086536d1e1deaff99ec0387481fb3325c82e4e48be0e75ab3d3fcb487a"
checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
@ -297,9 +298,9 @@ dependencies = [
[[package]]
name = "pyo3-macros-backend"
version = "0.23.1"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e77dfeb76b32bbf069144a5ea0a36176ab59c8db9ce28732d0f06f096bbfbc8"
checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4"
dependencies = [
"heck",
"proc-macro2",
@ -309,8 +310,8 @@ dependencies = [
]
[[package]]
name = "qubed_tree"
version = "0.1.0"
name = "qubed"
version = "0.1.2"
dependencies = [
"pyo3",
"rsfdb",
@ -320,9 +321,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.37"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
dependencies = [
"proc-macro2",
]
@ -381,24 +382,24 @@ dependencies = [
[[package]]
name = "ryu"
version = "1.0.18"
version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd"
[[package]]
name = "serde"
version = "1.0.215"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.215"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
dependencies = [
"proc-macro2",
"quote",
@ -407,9 +408,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.133"
version = "1.0.138"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377"
checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
dependencies = [
"itoa",
"memchr",
@ -431,9 +432,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.89"
version = "2.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e"
checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
dependencies = [
"proc-macro2",
"quote",
@ -457,9 +458,9 @@ dependencies = [
[[package]]
name = "unicode-ident"
version = "1.0.14"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
[[package]]
name = "unindent"
@ -495,7 +496,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.59.0",
"windows-sys",
]
[[package]]
@ -504,15 +505,6 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.59.0"

View File

@ -14,10 +14,10 @@ pyo3 = "0.23"
[lib]
name = "tree_traverser"
crate-type = ["cdylib"]
path = "./rust_src/lib.rs"
path = "./src/rust/lib.rs"
[patch.'https://github.com/ecmwf/rsfdb']
rsfdb = { path = "../../rsfdb" }
rsfdb = { path = "../rsfdb" }
[patch.'https://github.com/ecmwf-projects/rsfindlibs']
rsfindlibs = { path = "../../rsfindlibs" }
rsfindlibs = { path = "../rsfindlibs" }

View File

@ -1,11 +0,0 @@
[ class=od, stream, date, time
[ domain, type, levtype, dbase, rki, rty, ty
[ step, levelist?, param ]]
]
[ class=ensemble, number, stream, date, time,
[ domain, type, levtype, dbase, rki, rty, ty
[ step, levelist?, param ]]
]
[ class, foo]

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +0,0 @@
[ class=od, stream, date, time
[ domain, type, levtype, dbase, rki, rty, ty
[ step, levelist?, param ]]
]
[ class=ensemble, number, stream, date, time,
[ domain, type, levtype, dbase, rki, rty, ty
[ step, levelist?, param ]]
]
[ class, foo]

View File

@ -1,590 +0,0 @@
# * Format of the rules is:
# [a1, a2, a3 ...[b1, b2, b3... [c1, c2, c3...]]]
# - The first level (a) defines which attributes are used to name the top level directory
# - The second level (b) defines which attributes are used to name the data files
# - The third level (c) defines which attributes are used as index keys
# * Rules can be grouped
# [a1, a2, a3 ...
# [b1, b2, b3... [c1, c2, c3...]]
# [B1, B2, B3... [C1, C2, C3...]]
# ]
# * A list of values can be given for an attribute
# [ ..., stream=enfo/efov, ... ]
# This will be used when matching rules.
# * Attributes can be typed
# Globally, at the begining of this file:
# refdate: Date;
# or in the context of a rule:
# [type=cl, ... [date:ClimateMonth, ...]]
# Typing attributes is done when the user's requests or the GRIB values need to be modified before directories, files and indexes are created. For example, ClimateMonth will transform 2010-04-01 to 'may' internally.
# * Attributes can be optional
# [ step, levelist?, param ]
# They will be replaced internally by an empty value. It is also posiible to provide a default subtitution value: e.g. [domain?g] will consider the domain to be 'g' if missing.
# * Attributes can be removed:
# [grid-]
# This is useful to remove attributes present in the GRIB that should not be ignored
# * Rules are matched:
# - If the attributes are present in the GRIB/Request, or marked optional or ignored
# - If a list of possible value is provided, one of them must match, for example
# [ class, expver, stream=enfo/efov, date, time, domain ]
# will match either stream=enfo or stream=efov, all other attributes will be matched if they exist in the GRIB or user's request
# * On archive:
# - Attributes are extracted from the GRIB (namespace 'mars'), possibly modified by the attribute type
# - Only the first rule is used, so order is important
# - All GRIB attributes must be used by the rules, otherwise an error is raised
# * On retrieve:
# - Attributes are extracted from the user's request, possibly modified by the attribute type (e.g. for handling of U/V)
# - All the matching rules are considered
# - Only attributes listed in the rules are used to extract values from the user's request
# Default types
param: Param;
step: Step;
date: Date;
hdate: Date;
refdate: Date;
latitude: Double;
longitude: Double;
levelist: Double;
grid: Grid;
expver: Expver;
time: Time;
fcmonth: Integer;
number: Integer;
frequency: Integer;
direction: Integer;
channel: Integer;
instrument: Integer;
ident: Integer;
diagnostic: Integer;
iteration: Integer;
system: Integer;
method: Integer;
# ???????
# reference: Integer;
# fcperiod: Integer;
# opttime: Integer;
# leadtime: Integer;
# quantile: ??????
# range: ??????
# band: Integer;
########################################################
# These rules must be first, otherwise fields of These
# classes will be index with the default rule for oper
[ class=ti/s2, expver, stream, date, time, model
[ origin, type, levtype, hdate?
[ step, number?, levelist?, param ]]
]
[ class=ms, expver, stream, date, time, country=de
[ domain, type, levtype, dbase, rki, rty, ty
[ step, levelist?, param ]]
]
[ class=ms, expver, stream, date, time, country=it
[ domain, type, levtype, model, bcmodel, icmodel:First3
[ step, levelist?, param ]
]
]
[ class=el, expver, stream, date, time, domain
[ origin, type, levtype
[ step, levelist?, param ]]
]
########################################################
# The are the rules matching most of the fields
# oper/dcda
[ class, expver, stream=oper/dcda/scda, date, time, domain?
[ type=im/sim
[ step?, ident, instrument, channel ]]
[ type=ssd
[ step, param, ident, instrument, channel ]]
[ type=4i, levtype
[ step, iteration, levelist, param ]]
[ type=me, levtype
[ step, number, levelist?, param ]]
[ type=ef, levtype
[ step, levelist?, param, channel? ]]
[ type=ofb/mfb
[ obsgroup, reportype ]]
[ type, levtype
[ step, levelist?, param ]]
]
# dcwv/scwv/wave
[ class, expver, stream=dcwv/scwv/wave, date, time, domain
[ type, levtype
[ step, param, frequency?, direction? ]]]
# enfo
[ class, expver, stream=enfo/efov, date, time, domain
[ type, levtype=dp, product?, section?
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
[ type=tu, levtype, reference
[ step, number, levelist?, param ]]
[ type, levtype
[ step, quantile?, number?, levelist?, param ]]
]
# waef/weov
[ class, expver, stream=waef/weov, date, time, domain
[ type, levtype
[ step, number?, param, frequency?, direction? ]]
]
########################################################
# enda
[ class, expver, stream=enda, date, time, domain
[ type=ef/em/es/ses, levtype
[ step, number?, levelist?, param, channel? ]]
[ type=ssd
[ step, number, param, ident, instrument, channel ]]
[ type, levtype
[ step, number?, levelist?, param ]]
]
# ewda
[ class, expver, stream=ewda, date, time, domain
[ type, levtype
[ step, number?, param, frequency?, direction? ]]
]
########################################################
# elda
[ class, expver, stream=elda, date, time, domain?
[ type=ofb/mfb
[ obsgroup, reportype ]]
[ type, levtype, anoffset
[ step, number?, levelist?, iteration?, param, channel? ]]
]
# ewda
[ class, expver, stream=ewla, date, time, domain
[ type, levtype, anoffset
[ step, number?, param, frequency?, direction? ]]
]
########################################################
# elda
[ class, expver, stream=lwda, date, time, domain?
[ type=ssd, anoffset
[ step, param, ident, instrument, channel ]]
[type=me, levtype, anoffset
[ number, step, levelist?, param]]
[ type=4i, levtype, anoffset
[ step, iteration, levelist, param ]]
[ type=ofb/mfb
[ obsgroup, reportype ]]
[ type, levtype, anoffset
[ step, levelist?, param]]
]
# ewda
[ class, expver, stream=lwwv, date, time, domain
[ type, levtype, anoffset
[ step, param, frequency?, direction? ]]
]
########################################################
# amap
[ class, expver, stream=amap, date, time, domain
[ type, levtype, origin
[ step, levelist?, param ]]]
# maed
[ class, expver, stream=maed, date, time, domain
[ type, levtype, origin
[ step, levelist?, param ]]]
# mawv
[ class, expver, stream=mawv, date, time, domain
[ type, levtype, origin
[ step, param, frequency?, direction? ]]]
# cher
[ class, expver, stream=cher, date, time, domain
[ type, levtype
[ step, levelist, param ]]]
# efhc
[ class, expver, stream=efhc, refdate, time, domain
[ type, levtype, date
[ step, number?, levelist?, param ]]]
# efho
[ class, expver, stream=efho, date, time, domain
[ type, levtype, hdate
[ step, number?, levelist?, param ]]]
# efhs
[ class, expver, stream=efhs, date, time, domain
[ type, levtype
[ step, quantile?, number?, levelist?, param ]]]
# wehs
[ class, expver, stream=wehs, date, time, domain
[ type, levtype
[ step, quantile?, number?, levelist?, param ]]]
# kwbc
[ class, expver, stream=kwbc, date, time, domain
[ type, levtype
[ step, number?, levelist?, param ]]]
# ehmm
[ class, expver, stream=ehmm, date, time, domain
[ type, levtype, hdate
[ fcmonth, levelist?, param ]]]
# ammc/cwao/edzw/egrr/lfpw/rjtd/toga
[ class, expver, stream=ammc/cwao/edzw/egrr/lfpw/rjtd/toga/fgge, date, time, domain
[ type, levtype
[ step, levelist?, param ]]]
########################################################################
# enfh
[ class, expver, stream=enfh, date, time, domain
[ type, levtype=dp, hdate, product?, section?
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
[ type, levtype, hdate
[ step, number?, levelist?, param ]]
]
# enwh
[ class, expver, stream=enwh, date, time, domain
[ type, levtype, hdate
[ step, number?, param, frequency?, direction? ]]
]
########################################################################
# sens
[ class, expver, stream=sens, date, time, domain
[ type, levtype
[ step, diagnostic, iteration, levelist?, param ]]]
########################################################################
# esmm
[ class, expver, stream=esmm, date, time, domain
[ type, levtype
[ fcmonth, levelist?, param ]]]
# ewhc
[ class, expver, stream=ewhc, refdate, time, domain
[ type, levtype, date
[ step, number?, param, frequency?, direction? ]]]
########################################################################
# ewho
[ class, expver, stream=ewho, date, time, domain
[ type, levtype, hdate
[ step, number?, param, frequency?, direction? ]]]
# mfam
[ class, expver, stream=mfam, date, time, domain
[ type=pb/pd, levtype, origin, system?, method
[ fcperiod, quantile, levelist?, param ]]
[ type, levtype, origin, system?, method
[ fcperiod, number?, levelist?, param ]]
]
# mfhm
[ class, expver, stream=mfhm, refdate, time, domain
[ type, levtype, origin, system?, method, date?
[ fcperiod, number?, levelist?, param ]]]
# mfhw
[ class, expver, stream=mfhw, refdate, time, domain
[ type, levtype, origin, system?, method, date
[ step, number?, param ]]]
# mfwm
[ class, expver, stream=mfwm, date, time, domain
[ type, levtype, origin, system?, method
[ fcperiod, number, param ]]]
# mhwm
[ class, expver, stream=mhwm, refdate, time, domain
[ type, levtype, origin, system?, method, date
[ fcperiod, number, param ]]]
# mmsf
[ class, expver, stream=mmsf, date, time, domain
[ type, levtype=dp, origin, product, section, system?, method
[ step, number, levelist?, latitude?, longitude?, range?, param ]]
[ type, levtype, origin, system?, method
[ step, number, levelist?, param ]]
]
# mnfc
[ class, expver, stream=mnfc, date, time, domain
[ type, levtype=dp, origin, product, section, system?, method
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
[ type, levtype, origin, system?, method
[ step, number?, levelist?, param ]]
]
# mnfh
[ class, expver, stream=mnfh, refdate, time, domain
[ type, levtype=dp, origin, product, section, system?, method, date
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
[ type, levtype, origin, system?, method, date?
[ step, number?, levelist?, param ]]
]
# mnfm
[ class, expver, stream=mnfm, date, time, domain
[ type, levtype, origin, system?, method
[ fcperiod, number?, levelist?, param ]]]
# mnfw
[ class, expver, stream=mnfw, date, time, domain
[ type, levtype, origin, system?, method
[ step, number?, param ]]]
# ea/mnth
[ class=ea, expver, stream=mnth, date, domain
[ type, levtype
[ time, step?, levelist?, param ]]]
# mnth
[ class, expver, stream=mnth, domain
[ type=cl, levtype
[ date: ClimateMonthly, time, levelist?, param ]]
[ type, levtype
[ date , time, step?, levelist?, param ]]]
# mofc
[ class, expver, stream=mofc, date, time, domain
[ type, levtype=dp, product, section, system?, method
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
[ type, levtype, system?, method
[ step, number?, levelist?, param ]]
]
# mofm
[ class, expver, stream=mofm, date, time, domain
[ type, levtype, system?, method
[ fcperiod, number, levelist?, param ]]]
# mmsa/msmm
[ class, expver, stream=mmsa, date, time, domain
[ type, levtype, origin, system?, method
[ fcmonth, number?, levelist?, param ]]]
[ class, expver, stream=msmm, date, time, domain
[ type, levtype, origin, system?, method
[ fcmonth, number?, levelist?, param ]]]
# ocea
[ class, expver, stream=ocea, date, time, domain
[ type, levtype, product, section, system?, method
[ step, number, levelist?, latitude?, longitude?, range?, param ]]
]
#=# seas
[ class, expver, stream=seas, date, time, domain
[ type, levtype=dp, product, section, system?, method
[ step, number, levelist?, latitude?, longitude?, range?, param ]]
[ type, levtype, system?, method
[ step, number, levelist?, param ]]
]
# sfmm/smma
[ class, expver, stream=sfmm/smma, date, time, domain
[ type, levtype, system?, method
[ fcmonth, number?, levelist?, param ]]]
# supd
[ class=od, expver, stream=supd, date, time, domain
[ type, levtype, origin?, grid
[ step, levelist?, param ]]]
# For era
[ class, expver, stream=supd, date, time, domain
[ type, levtype, grid- # The minus sign is here to consume 'grid', but don't index it
[ step, levelist?, param ]]]
# swmm
[ class, expver, stream=swmm, date, time, domain
[ type, levtype, system?, method
[ fcmonth, number, param ]]]
# wamf
[ class, expver, stream=wamf, date, time, domain
[ type, levtype, system?, method
[ step, number?, param ]]]
# ea/wamo
[ class=ea, expver, stream=wamo, date, domain
[ type, levtype
[ time, step?, param ]]]
# wamo
[ class, expver, stream=wamo, domain
[ type=cl, levtype
[ date: ClimateMonthly, time, param ]]
[ type, levtype
[ date, time, step?, param ]]]
# wamd
[ class, expver, stream=wamd, date, domain
[ type, levtype
[ param ]]]
# wasf
[ class, expver, stream=wasf, date, time, domain
[ type, levtype, system?, method
[ step, number, param ]]]
# wmfm
[ class, expver, stream=wmfm, date, time, domain
[ type, levtype, system?, method
[ fcperiod, number, param ]]]
# moda
[ class, expver, stream=moda, date, domain
[ type, levtype
[ levelist?, param ]]]
# msdc/mdfa/msda
[ class, expver, stream=msdc/mdfa/msda, domain
[ type, levtype
[ date, time?, step?, levelist?, param ]]]
# seap
[ class, expver, stream=seap, date, time, domain
[ type=sv/svar, levtype, origin, method?
[ step, leadtime, opttime, number, levelist?, param ]]
[ type=ef, levtype, origin
[ step, levelist?, param, channel? ]]
[ type, levtype, origin
[ step, levelist?, param ]]
]
[ class, expver, stream=mmaf, date, time, domain
[ type, levtype, origin, system?, method
[ step, number, levelist?, param ]]
]
[ class, expver, stream=mmam, date, time, domain
[ type, levtype, origin, system?, method
[ fcmonth, number, levelist?, param ]]
]
[ class, expver, stream=dacl, domain
[ type=pb, levtype
[ date: ClimateDaily, time, step, quantile, levelist?, param ]]
[ type, levtype
[ date: ClimateDaily, time, step, levelist?, param ]]
]
[ class, expver, stream=dacw, domain
[ type=pb, levtype
[ date: ClimateDaily, time, step, quantile, param ]]
[ type, levtype
[ date: ClimateDaily, time, step, param ]]
]
[ class, expver, stream=edmm/ewmm, date, time, domain
[ type=ssd
[ step, number, param, ident, instrument, channel ]]
[ type, levtype
[ step, number, levelist?, param ]]
]
[ class, expver, stream=edmo/ewmo, date, domain
[ type, levtype
[ number, levelist?, param ]]
]
# stream gfas
[ class=mc/rd, expver, stream=gfas, date, time, domain
[ type=ga, levtype
[ step, param ]]
[ type=gsd
[ param, ident, instrument ]]
]
# class is e2
[ class, expver, stream=espd, date, time, domain
[ type, levtype, origin, grid
[ step, number, levelist?, param ]]]
[ class=cs, expver, stream, date:Default, time, domain
[ type, levtype
[ step, levelist?, param ]]]

View File

@ -17,17 +17,15 @@ release = '0.1.0'
extensions = [
"sphinx.ext.autodoc", # for generating documentation from the docstrings in our code
"sphinx.ext.napoleon", # for parsing Numpy and Google stye docstrings
"myst_parser", # For parsing markdown
"myst_nb", # For parsing markdown
]
templates_path = ['_templates']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', "jupyter_execute"]
source_suffix = {
".rst": "restructuredtext",
".txt": "markdown",
".md": "markdown",
}
# -- Options for HTML output -------------------------------------------------

View File

@ -1,3 +1,15 @@
---
jupytext:
text_representation:
extension: .md
format_name: myst
format_version: 0.13
jupytext_version: 1.16.4
kernelspec:
display_name: Python 3
language: python
name: python3
---
## Qubed
# Datacubes, Trees and Compressed trees
@ -7,7 +19,8 @@ This first part is essentially a abridged version of the [datacube spec](https:/
Qubed is primarily geared towards dealing with datafiles uniquely labeled by sets of key value pairs. We'll call a set of key value pairs that uniquely labels some data an `identifier`. Here's an example:
```python
{'class': 'd1',
{
'class': 'd1',
'dataset': 'climate-dt',
'generation': '1',
'date': '20241102',
@ -19,7 +32,8 @@ Qubed is primarily geared towards dealing with datafiles uniquely labeled by set
Unfortunately, we have more than one data file. If we are lucky, the set of identifiers that current exists might form a dense datacube that we could represent like this:
```python
{'class': ['d1', 'd2'],
{
'class': ['d1', 'd2'],
'dataset': 'climate-dt',
'generation': ['1','2','3'],
'model': 'icon',
@ -29,53 +43,48 @@ Unfortunately, we have more than one data file. If we are lucky, the set of iden
}
```
with the property that any particular choice for a value for any key will correspond to datafile that exists.
with the property that any particular choice for a value for any key will correspond to datafile that exists. So this object represents `2x1x3x1x2x2x4 = 96` different datafiles.
To save space I will also represent this same thing like this:
```
- class=d1/d2, dataset=climate-dt, generation=1/2/3, model=icon, date=20241102/20241103, resolution=high/low, time=0000/0600/1200/1800
- class=d1/d2, dataset=climate-dt, generation=1/2/3, ..., time=0000/0600/1200/1800
```
Unfortunately, we are not lucky and our datacubes are not always dense. In this case we might instead represent which data exists using a tree:
```
root
├── class=od
│ ├── expver=0001
│ │ ├── param=1
│ │ └── param=2
│ └── expver=0002
│ ├── param=1
│ └── param=2
└── class=rd
├── expver=0001
│ ├── param=1
│ ├── param=2
│ └── param=3
└── expver=0002
├── param=1
└── param=2
```{code-cell} python3
from qubed import Qube
q = Qube.from_dict({
"class=od" : {
"expver=0001": {"param=1":{}, "param=2":{}},
"expver=0002": {"param=1":{}, "param=2":{}},
},
"class=rd" : {
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
"expver=0002": {"param=1":{}, "param=2":{}},
},
})
q.print()
```
But it's clear that the above tree contains a lot of redundant information. Many of the subtrees are identical for example. Indeed in practice a lot of our data turns out to be 'nearly dense' in that it contains many dense datacubes within it.
There are many valid ways one could compress this tree. If we add the restriction that no identical key=value pairs can be adjacent then here is the compressed tree we might get:
```
root
├── class=rd
│ ├── expver=0001, param=1/2/3
│ └── expver=0002, param=1/2
└── class=od, expver=0001/0002, param=1/2
```
```{code-cell} python3
q.compress().print()
````
Without the above restriction we could instead have:
Without the above restriction we could, for example, have:
```
root
├── class=rd
│ ├── expver=0001, param=3
│ └── expver=0001/0002, param=1/2
└── class=od, expver=0001/0002, param=1/2
├── class=od, expver=0001/0002, param=1/2
└── class=rd
├── expver=0001, param=3
└── expver=0001/0002, param=1/2
```
but we do not allow this because it would mean we would have to take multiple branches in order to find data with `expver=0001`.

File diff suppressed because one or more lines are too long

View File

@ -3,8 +3,8 @@ requires = ["maturin>=1.7,<2.0"]
build-backend = "maturin"
[project]
name = "tree_traverser"
description = "Tools to work with compressed Datacubes and Trees"
name = "qubed"
description = "A library that provides a tree of datacubes called Qube."
readme = "README.md"
authors = [
{name = "Tom Hodson", email = "thomas.hodson@ecmwf.int"},
@ -21,17 +21,16 @@ dependencies = [
]
[tool.maturin]
python-source = "python_src"
module-name = "tree_traverser.rust"
python-source = "src/python"
module-name = "qubed.rust"
features = ["pyo3/extension-module"]
[project.optional-dependencies]
docs = [
"sphinx",
"sphinx-rtd-theme",
"myst-parser",
"myst_nb",
"sphinx-autobuild"
]

View File

@ -4,7 +4,7 @@ from dataclasses import dataclass, field
from frozendict import frozendict
from .DataCubeTree import Enum, NodeData, Tree
from .Qube import Enum, NodeData, Tree
from .tree_formatters import HTML, node_tree_to_html, node_tree_to_string
NodeId = int

View File

@ -1,5 +1,7 @@
import dataclasses
from collections import defaultdict
from dataclasses import dataclass, field
from functools import cached_property
from typing import Any, Callable, Hashable, Literal, Mapping
from frozendict import frozendict
@ -31,9 +33,9 @@ class NodeData:
return f"{self.key}={self.values.summary()}" if self.key != "root" else "root"
@dataclass(frozen=True, eq=True, order=True)
class Tree:
class Qube:
data: NodeData
children: tuple['Tree', ...]
children: tuple['Qube', ...]
@property
def key(self) -> str:
@ -52,7 +54,7 @@ class Tree:
return self.data.summary()
@classmethod
def make(cls, key : str, values : Values, children, **kwargs) -> 'Tree':
def make(cls, key : str, values : Values, children, **kwargs) -> 'Qube':
return cls(
data = NodeData(key, values, metadata = kwargs.get("metadata", frozendict())
),
@ -61,9 +63,9 @@ class Tree:
@classmethod
def from_json(cls, json: dict) -> 'Tree':
def from_json(json: dict) -> Tree:
return Tree.make(
def from_json(cls, json: dict) -> 'Qube':
def from_json(json: dict) -> Qube:
return Qube.make(
key=json["key"],
values=values_from_json(json["values"]),
metadata=json["metadata"] if "metadata" in json else {},
@ -72,20 +74,20 @@ class Tree:
return from_json(json)
@classmethod
def from_dict(cls, d: dict) -> 'Tree':
def from_dict(d: dict) -> tuple[Tree, ...]:
return tuple(Tree.make(
def from_dict(cls, d: dict) -> 'Qube':
def from_dict(d: dict) -> tuple[Qube, ...]:
return tuple(Qube.make(
key=k.split("=")[0],
values=Enum(tuple(k.split("=")[1].split("/"))),
children=from_dict(children)
) for k, children in d.items())
return Tree.make(key = "root",
return Qube.make(key = "root",
values=Enum(("root",)),
children = from_dict(d))
@classmethod
def empty(cls) -> 'Tree':
def empty(cls) -> 'Qube':
return cls.make("root", Enum(("root",)), [])
@ -101,7 +103,7 @@ class Tree:
return node_tree_to_html(self, depth = 2, collapse = True)
def __getitem__(self, args) -> 'Tree':
def __getitem__(self, args) -> 'Qube':
key, value = args
for c in self.children:
if c.key == key and value in c.values:
@ -111,16 +113,16 @@ class Tree:
def transform(self, func: 'Callable[[Tree], Tree | list[Tree]]') -> 'Tree':
def transform(self, func: 'Callable[[Qube], Qube | list[Qube]]') -> 'Qube':
"""
Call a function on every node of the tree, return one or more nodes.
Call a function on every node of the Qube, return one or more nodes.
If multiple nodes are returned they each get a copy of the (transformed) children of the original node.
Any changes to the children of a node will be ignored.
"""
def transform(node: Tree) -> list[Tree]:
def transform(node: Qube) -> list[Qube]:
children = [cc for c in node.children for cc in transform(c)]
new_nodes = func(node)
if isinstance(new_nodes, Tree):
if isinstance(new_nodes, Qube):
new_nodes = [new_nodes]
return [dataclasses.replace(new_node, children = children)
@ -129,35 +131,14 @@ class Tree:
children = tuple(cc for c in self.children for cc in transform(c))
return dataclasses.replace(self, children = children)
def guess_datatypes(self) -> 'Tree':
def guess_datatypes(node: Tree) -> list[Tree]:
# Try to convert enum values into more structured types
children = tuple(cc for c in node.children for cc in guess_datatypes(c))
if isinstance(node.values, Enum):
match node.key:
case "time": range_class = TimeRange
case "date": range_class = DateRange
case _: range_class = None
if range_class is not None:
return [
dataclasses.replace(node, values = range, children = children)
for range in range_class.from_strings(node.values.values)
]
return [dataclasses.replace(node, children = children)]
children = tuple(cc for c in self.children for cc in guess_datatypes(c))
return dataclasses.replace(self, children = children)
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Tree':
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Qube':
# make all values lists
selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
def not_none(xs): return tuple(x for x in xs if x is not None)
def select(node: Tree) -> Tree | None:
def select(node: Qube) -> Qube | None:
# Check if the key is specified in the selection
if node.key not in selection:
if mode == "strict":
@ -176,10 +157,10 @@ class Tree:
@staticmethod
def _insert(position: "Tree", identifier : list[tuple[str, list[str]]]):
def _insert(position: "Qube", identifier : list[tuple[str, list[str]]]):
"""
This algorithm goes as follows:
We're at a particular node in the tree, and we have a list of key-values pairs that we want to insert.
We're at a particular node in the Qube, and we have a list of key-values pairs that we want to insert.
We take the first key values pair
key, values = identifier.pop(0)
@ -226,12 +207,12 @@ class Tree:
# values = values - values_set # At the end of this loop values will contain only the new values
# if group_1:
# group_1_node = Tree.make(c.key, Enum(tuple(group_1)), c.children)
# group_1_node = Qube.make(c.key, Enum(tuple(group_1)), c.children)
# new_children.append(group_1_node) # Add the unaffected part of this child
# if group_2:
# new_node = Tree.make(key, Enum(tuple(affected)), [])
# new_node = Tree._insert(new_node, identifier)
# new_node = Qube.make(key, Enum(tuple(affected)), [])
# new_node = Qube._insert(new_node, identifier)
# new_children.append(new_node) # Add the affected part of this child
@ -243,21 +224,71 @@ class Tree:
# # If there are any values not in any of the existing children, add them as a new child
# if entirely_new_values:
# new_node = Tree.make(key, Enum(tuple(entirely_new_values)), [])
# new_children.append(Tree._insert(new_node, identifier))
# new_node = Qube.make(key, Enum(tuple(entirely_new_values)), [])
# new_children.append(Qube._insert(new_node, identifier))
return Tree.make(position.key, position.values, new_children)
return Qube.make(position.key, position.values, new_children)
def insert(self, identifier : dict[str, list[str]]) -> 'Tree':
def insert(self, identifier : dict[str, list[str]]) -> 'Qube':
insertion = [(k, v) for k, v in identifier.items()]
return Tree._insert(self, insertion)
return Qube._insert(self, insertion)
def to_list_of_cubes(self):
def to_list_of_cubes(node: Tree) -> list[list[Tree]]:
def to_list_of_cubes(node: Qube) -> list[list[Qube]]:
return [[node] + sub_cube for c in node.children for sub_cube in to_list_of_cubes(c)]
return to_list_of_cubes(self)
def info(self):
cubes = self.to_list_of_cubes()
print(f"Number of distinct paths: {len(cubes)}")
print(f"Number of distinct paths: {len(cubes)}")
@cached_property
def structural_hash(self) -> int:
"""
This hash takes into account the key, values and children's key values recursively.
Because nodes are immutable, we only need to compute this once.
"""
def hash_node(node: Qube) -> int:
return hash((node.key, node.values, tuple(c.structural_hash for c in node.children)))
return hash_node(self)
def compress(self) -> "Qube":
# First compress the children
new_children = [child.compress() for child in self.children]
# Now take the set of new children and see if any have identical key, metadata and children
# the values may different and will be collapsed into a single node
identical_children = defaultdict(set)
for child in new_children:
# only care about the key and children of each node, ignore values
key = hash((child.key, tuple((cc.structural_hash for cc in child.children))))
identical_children[key].add(child)
# Now go through and create new compressed nodes for any groups that need collapsing
new_children = []
for child_set in identical_children.values():
if len(child_set) > 1:
child_set = list(child_set)
key = child_set[0].key
# Compress the children into a single node
assert all(isinstance(child.data.values, Enum) for child in child_set), "All children must have Enum values"
node_data = NodeData(
key = key,
metadata = frozendict(), # Todo: Implement metadata compression
values = Enum(tuple(v for child in child_set for v in child.data.values.values)),
)
new_child = Qube(data = node_data, children = child_set[0].children)
else:
# If the group is size one just keep it
new_child = child_set.pop()
new_children.append(new_child)
return Qube(
data = self.data,
children = tuple(sorted(new_children))
)

View File

@ -0,0 +1,2 @@
from . import rust as backend
from .Qube import Qube

Binary file not shown.

View File

@ -1,8 +1,8 @@
import json
import os
from collections import defaultdict
from typing import Any, Dict
from pathlib import Path
from typing import Any, Dict
import redis
import yaml
@ -108,6 +108,12 @@ def get_leaves(tree):
for leaf in get_leaves(v):
yield leaf
@app.get("/api/tree")
async def get_tree(request: Request):
request_dict = request_to_dict(request)
print(c_tree.multi_match(request_dict))
return c_tree.multi_match(request_dict)
@app.get("/api/match")
async def get_match(request: Request):
# Convert query parameters to dictionary format

View File

@ -1,3 +1,3 @@
parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
cd "$parent_path"
CONFIG_DIR=../config/local fastapi dev ./main.py --port 8124 --reload
LOCAL_CACHE=../config/climate-dt fastapi dev ./main.py --port 8124 --reload

View File

@ -1,72 +0,0 @@
/target
# Byte-compiled / optimized / DLL files
__pycache__/
.pytest_cache/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
.venv/
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
include/
man/
venv/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
.DS_Store
# Sphinx documentation
docs/_build/
# PyCharm
.idea/
# VSCode
.vscode/
# Pyenv
.python-version

View File

@ -1,21 +0,0 @@
```
pip install maturin
maturing develop
```
To values.yaml add config for the periodic update job:
How often to run the update job
What request stub to use:
dataset: climate-dt
date: -2/-1
etc...
What order to put the keys in in the tree
key_order:
- activity
- class
- dataset
- date

View File

@ -1,2 +0,0 @@
from . import rust as backend
from .CompressedTree import CompressedTree, RefcountedDict