Compare commits
170 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
165bf5aca2 | ||
![]() |
aaafa28dfb | ||
![]() |
3328a0375b | ||
![]() |
ba2c67d812 | ||
![]() |
04b4ee24eb | ||
![]() |
7069b70dd4 | ||
![]() |
90ea736c43 | ||
![]() |
959dac332d | ||
![]() |
97c5abc38b | ||
![]() |
1188733034 | ||
![]() |
35bb8f0edd | ||
![]() |
ed4a9055fa | ||
![]() |
110046b251 | ||
![]() |
a85b700084 | ||
![]() |
271d06c65a | ||
![]() |
7c28c7023b | ||
![]() |
4924fdb804 | ||
![]() |
d246dae54d | ||
![]() |
07f9a24daa | ||
![]() |
b13a06a0cc | ||
![]() |
87c57ec2cc | ||
![]() |
a957d26da7 | ||
![]() |
80b0408722 | ||
![]() |
79e9f83c8c | ||
![]() |
fff00ca6f1 | ||
![]() |
fa646aee77 | ||
![]() |
e04c0dd3bc | ||
![]() |
4e777f295d | ||
![]() |
1259ff08b6 | ||
![]() |
7b36a76154 | ||
![]() |
10106ba6d8 | ||
![]() |
2fa99d775c | ||
![]() |
b5c2681f63 | ||
![]() |
dfc61caa38 | ||
![]() |
a502cb6ab2 | ||
![]() |
ca944521f9 | ||
![]() |
6ec4b044b8 | ||
![]() |
251bec14fc | ||
![]() |
c3556ce6fa | ||
![]() |
bf47401e6c | ||
![]() |
11014b07ea | ||
![]() |
b6a27fdadf | ||
![]() |
70b1fd65e5 | ||
![]() |
2e36db4268 | ||
![]() |
6039a3a494 | ||
![]() |
7ef930bc1c | ||
![]() |
79983f85a1 | ||
![]() |
ab2f8cf3f3 | ||
![]() |
4502a942cb | ||
![]() |
3017185950 | ||
![]() |
cd26905261 | ||
![]() |
0d3c8248b0 | ||
![]() |
b2aba5dd42 | ||
![]() |
79b97fd813 | ||
![]() |
275831d186 | ||
![]() |
57877e1e0c | ||
![]() |
cf9db41dc4 | ||
![]() |
8f1735c076 | ||
![]() |
39f348244d | ||
![]() |
2884f9fff8 | ||
![]() |
df5360f29a | ||
![]() |
d2f3165fe8 | ||
![]() |
6b98f7b7a9 | ||
![]() |
9beaaa2e10 | ||
![]() |
06c84fb20e | ||
![]() |
c31467fb04 | ||
![]() |
6648502bf4 | ||
![]() |
e14b9ee12f | ||
![]() |
48444cc3ce | ||
![]() |
8306fb4c3e | ||
![]() |
68ad80e435 | ||
![]() |
162dd48748 | ||
![]() |
ef844c9b57 | ||
![]() |
1f7c5dfecd | ||
![]() |
a23f366969 | ||
![]() |
ecccf336b4 | ||
![]() |
4c941d34f8 | ||
![]() |
a832e44e03 | ||
![]() |
11516a05ba | ||
![]() |
8f5b202621 | ||
![]() |
4a16d16748 | ||
![]() |
3de40e46ef | ||
![]() |
8a2c5b341d | ||
![]() |
819c29d768 | ||
![]() |
ee546cd788 | ||
![]() |
9873241eab | ||
![]() |
e432040321 | ||
![]() |
2d0c301062 | ||
![]() |
52a82447f9 | ||
![]() |
a70bd9f0cd | ||
![]() |
1ca23ca4cf | ||
![]() |
bb61e6fe7c | ||
![]() |
73dd9a16a8 | ||
![]() |
967adb1a69 | ||
![]() |
4bcb09180e | ||
![]() |
ea07545dc0 | ||
![]() |
9d4fcbe624 | ||
![]() |
fe00bb1c7f | ||
![]() |
af69d2fe00 | ||
![]() |
62c7a49c59 | ||
![]() |
adeccec4e5 | ||
![]() |
dca1e628df | ||
![]() |
8fc87955d4 | ||
![]() |
ee1a5aa61b | ||
![]() |
0abbdfab15 | ||
![]() |
2392531ec7 | ||
![]() |
00ea804c35 | ||
![]() |
fbf8a0fcaf | ||
![]() |
1ab51646b9 | ||
![]() |
516a4abb85 | ||
![]() |
515e373c18 | ||
![]() |
ee1bee2a01 | ||
![]() |
37298b7096 | ||
![]() |
dc52408e35 | ||
![]() |
db3c18b3c8 | ||
![]() |
6277920ac4 | ||
![]() |
c537028bb7 | ||
![]() |
be5a81c400 | ||
![]() |
084ad96569 | ||
![]() |
7bafcda627 | ||
![]() |
847bd0ab12 | ||
![]() |
bcd2d8feae | ||
![]() |
9928aacee2 | ||
![]() |
b6c1f76698 | ||
![]() |
e703b5f308 | ||
![]() |
1dc9177a91 | ||
![]() |
609e3e9f74 | ||
![]() |
81a478a58f | ||
![]() |
f51f5dcb42 | ||
![]() |
fcdf4e0d51 | ||
![]() |
3dba4eaa5e | ||
![]() |
257380c46d | ||
![]() |
f6088e9583 | ||
![]() |
deba35f71a | ||
![]() |
ca6b1fa8f9 | ||
![]() |
07b2c37aed | ||
![]() |
9c4af79640 | ||
![]() |
35b54c9f7e | ||
![]() |
01729a323a | ||
![]() |
b679402a1b | ||
![]() |
adb9923ffe | ||
![]() |
04a0cd8ab2 | ||
![]() |
523773d467 | ||
![]() |
f0fc7ad514 | ||
![]() |
cade421402 | ||
![]() |
c1a4f7eb3b | ||
![]() |
b393e9c12a | ||
![]() |
a998f44b93 | ||
![]() |
55249d119a | ||
![]() |
08b5d10a26 | ||
![]() |
25ff51c71e | ||
![]() |
e07c06b1ee | ||
![]() |
9ab2f4dbcf | ||
![]() |
df8ea6c2f9 | ||
![]() |
50d86c77ec | ||
![]() |
e00a5ff919 | ||
![]() |
4a7bcc0765 | ||
![]() |
5e2e13f200 | ||
![]() |
2f077bd522 | ||
![]() |
76ec160d6a | ||
![]() |
9e18a594af | ||
![]() |
1d454229cf | ||
![]() |
214395f617 | ||
![]() |
243d2df00c | ||
![]() |
e7f49e5898 | ||
![]() |
5765da7ecc | ||
![]() |
17e7113a8c | ||
![]() |
808eca4e15 | ||
![]() |
79345d2c0d | ||
![]() |
2f65507107 |
188
.github/workflows/build_wheels.yml
vendored
Normal file
188
.github/workflows/build_wheels.yml
vendored
Normal file
@ -0,0 +1,188 @@
|
||||
# This file is autogenerated by maturin v1.7.7
|
||||
# To update, run
|
||||
#
|
||||
# maturin generate-ci github
|
||||
#
|
||||
name: Build Python Wheels and push to PyPI
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: ubuntu-22.04
|
||||
target: x86_64
|
||||
- runner: ubuntu-22.04
|
||||
target: x86
|
||||
- runner: ubuntu-22.04
|
||||
target: aarch64
|
||||
- runner: ubuntu-22.04
|
||||
target: armv7
|
||||
- runner: ubuntu-22.04
|
||||
target: s390x
|
||||
- runner: ubuntu-22.04
|
||||
target: ppc64le
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Set cargo version from tag
|
||||
run: python .github/workflows/update_version.py
|
||||
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
manylinux: auto
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-linux-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
musllinux:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: ubuntu-22.04
|
||||
target: x86_64
|
||||
- runner: ubuntu-22.04
|
||||
target: x86
|
||||
- runner: ubuntu-22.04
|
||||
target: aarch64
|
||||
- runner: ubuntu-22.04
|
||||
target: armv7
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Set cargo version from tag
|
||||
run: python .github/workflows/update_version.py
|
||||
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
manylinux: musllinux_1_2
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-musllinux-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
windows:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: windows-latest
|
||||
target: x64
|
||||
- runner: windows-latest
|
||||
target: x86
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
architecture: ${{ matrix.platform.target }}
|
||||
- name: Set cargo version from tag
|
||||
run: python .github/workflows/update_version.py
|
||||
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-windows-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
macos:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: macos-13
|
||||
target: x86_64
|
||||
- runner: macos-14
|
||||
target: aarch64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Set cargo version from tag
|
||||
run: python .github/workflows/update_version.py
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-macos-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
sdist:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set cargo version from tag
|
||||
run: python .github/workflows/update_version.py
|
||||
- name: Build sdist
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: sdist
|
||||
args: --out dist
|
||||
- name: Upload sdist
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-sdist
|
||||
path: dist
|
||||
|
||||
release:
|
||||
name: Release
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
|
||||
needs: [linux, musllinux, windows, macos, sdist]
|
||||
permissions:
|
||||
# Use to sign the release artifacts
|
||||
id-token: write
|
||||
# Used to upload release artifacts
|
||||
contents: write
|
||||
# Used to generate artifact attestation
|
||||
attestations: write
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Generate artifact attestation
|
||||
uses: actions/attest-build-provenance@v1
|
||||
with:
|
||||
subject-path: 'wheels-*/*'
|
||||
- name: Publish to PyPI
|
||||
if: ${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
uses: PyO3/maturin-action@v1
|
||||
env:
|
||||
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
|
||||
with:
|
||||
command: upload
|
||||
args: --non-interactive --skip-existing wheels-*/*
|
34
.github/workflows/test.yml
vendored
Normal file
34
.github/workflows/test.yml
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
# This file is autogenerated by maturin v1.7.7
|
||||
# To update, run
|
||||
#
|
||||
# maturin generate-ci github
|
||||
#
|
||||
name: Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
runs-on:
|
||||
- ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build and Install
|
||||
run: |
|
||||
python -m pip install ".[dev]"
|
||||
- name: Test
|
||||
run: |
|
||||
pytest
|
42
.github/workflows/update_version.py
vendored
Executable file
42
.github/workflows/update_version.py
vendored
Executable file
@ -0,0 +1,42 @@
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
CARGO_TOML_PATH = Path("Cargo.toml")
|
||||
|
||||
|
||||
# Get the latest Git tag and strip the leading 'v' if present
|
||||
def get_git_version():
|
||||
try:
|
||||
version = subprocess.check_output(
|
||||
["git", "describe", "--tags", "--always"], text=True
|
||||
).strip()
|
||||
version = re.sub(r"^v", "", version) # Remove leading 'v'
|
||||
return version
|
||||
except subprocess.CalledProcessError:
|
||||
raise RuntimeError(
|
||||
"Failed to get Git tag. Make sure you have at least one tag in the repository."
|
||||
)
|
||||
|
||||
|
||||
# Update version in Cargo.toml
|
||||
def update_cargo_version(new_version):
|
||||
cargo_toml = CARGO_TOML_PATH.read_text()
|
||||
|
||||
# Replace version in [package] section
|
||||
updated_toml = re.sub(
|
||||
r'^version = "[^"]+"',
|
||||
f'version = "{new_version}"',
|
||||
cargo_toml,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
|
||||
CARGO_TOML_PATH.write_text(updated_toml)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
version = get_git_version()
|
||||
print(f"Parsed version: {version}")
|
||||
|
||||
update_cargo_version(version)
|
||||
print(f"Updated Cargo.toml with version: {version}")
|
21
.gitignore
vendored
21
.gitignore
vendored
@ -1,2 +1,23 @@
|
||||
__pycache__
|
||||
.DS_Store
|
||||
config.yaml
|
||||
.venv
|
||||
*.json
|
||||
raw_list
|
||||
*.egg-info/
|
||||
deps/
|
||||
docs/_build/
|
||||
docs/jupyter_execute
|
||||
target/
|
||||
*.so
|
||||
_build/
|
||||
build/
|
||||
.ipynb_checkpoints/
|
||||
dist/
|
||||
Cargo.lock
|
||||
src/python/qubed/_version.py
|
||||
*.ipynb
|
||||
cmake_build/
|
||||
tests/data/
|
||||
*.secret
|
||||
node_modules/
|
||||
|
17
.pre-commit-config.yaml
Normal file
17
.pre-commit-config.yaml
Normal file
@ -0,0 +1,17 @@
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
# - id: check-yaml
|
||||
# - id: check-added-large-files
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.9.7
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [ --fix ]
|
||||
- id: ruff-format
|
29
.readthedocs.yaml
Normal file
29
.readthedocs.yaml
Normal file
@ -0,0 +1,29 @@
|
||||
# .readthedocs.yaml
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Set the version of Python and other tools you might need
|
||||
build:
|
||||
os: ubuntu-20.04
|
||||
tools:
|
||||
python: "3.12"
|
||||
rust: latest
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
# If using Sphinx, optionally build your docs in additional formats such as PDF
|
||||
# formats:
|
||||
# - pdf
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
||||
- method: pip
|
||||
path: .
|
||||
extra_requirements:
|
||||
- docs
|
27
Cargo.toml
Normal file
27
Cargo.toml
Normal file
@ -0,0 +1,27 @@
|
||||
[package]
|
||||
name = "qubed"
|
||||
version = "0.0.0"
|
||||
edition = "2021"
|
||||
repository = "https://github.com/ecmwf/qubed"
|
||||
|
||||
[dependencies]
|
||||
# rsfdb = {git = "https://github.com/ecmwf/rsfdb", branch = "develop"}
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
pyo3 = "0.25"
|
||||
lasso = "0.7.3"
|
||||
itertools = "0.14.0"
|
||||
|
||||
[package.metadata.maturin]
|
||||
version-from-git = true
|
||||
|
||||
[lib]
|
||||
name = "tree_traverser"
|
||||
crate-type = ["cdylib"]
|
||||
path = "./src/rust/lib.rs"
|
||||
|
||||
# [patch.'https://github.com/ecmwf/rsfdb']
|
||||
# rsfdb = { path = "../rsfdb" }
|
||||
|
||||
# [patch.'https://github.com/ecmwf-projects/rsfindlibs']
|
||||
# rsfindlibs = { path = "../rsfindlibs" }
|
201
LICENSE.txt
Normal file
201
LICENSE.txt
Normal file
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2020 European Centre for Medium-Range Weather Forecasts
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
80
README.md
80
README.md
@ -1 +1,79 @@
|
||||
# Catalog Server
|
||||
# <p align="center"><img src="https://raw.githubusercontent.com/ecmwf/qubed/refs/heads/main/docs/_static/banner.svg" width="1000"></p>
|
||||
[](https://github.com/ecmwf/codex/raw/refs/heads/main/Project%20Maturity#emerging)
|
||||
[](https://qubed.readthedocs.io/en/latest/)
|
||||
[](https://pypi.org/project/qubed/)
|
||||
[](https://pypi.org/project/qubed/)
|
||||
|
||||
Qubed provides a datastructure primitive for working with trees of DataCubes. If a normal tree looks like this:
|
||||
```
|
||||
root
|
||||
├── class=od
|
||||
│ ├── expver=0001
|
||||
│ │ ├── param=1
|
||||
│ │ └── param=2
|
||||
│ └── expver=0002
|
||||
│ ├── param=1
|
||||
│ └── param=2
|
||||
└── class=rd
|
||||
├── expver=0001
|
||||
│ ├── param=1
|
||||
│ ├── param=2
|
||||
│ └── param=3
|
||||
└── expver=0002
|
||||
├── param=1
|
||||
└── param=2
|
||||
```
|
||||
|
||||
A compressed view of the same set would be:
|
||||
```
|
||||
root
|
||||
├── class=od, expver=0001/0002, param=1/2
|
||||
└── class=rd
|
||||
├── expver=0001, param=1/2/3
|
||||
└── expver=0002, param=1/2
|
||||
```
|
||||
|
||||
Qubed provides all the algorithms on this data structure you would expect such as intersection/union/difference, compression, search, filtering etc.
|
||||
|
||||
In addition to this core datastructure, this repostitory contains a collection of components designed to deliver user friendly cataloging for datacube data. The STAC Server, Frontend and a periodic job to do tree compression can be deployed together to kubernetes using the [helm chart](./helm_chart). Thise deployment can then be accessed either via the Query Builder Web interface or the python client.
|
||||
|
||||
## 📦 Components Overview
|
||||
|
||||
|
||||
### 🚀 [Qubed STAC Server](./stac_server)
|
||||
> **FastAPI STAC Server Backend**
|
||||
|
||||
- 🌟 Implements our proposed [Datacube STAC Extension](./structured_stac.md).
|
||||
- 🛠️ Allows efficient traversal of ECMWF's datacubes.
|
||||
- Part of the implementation of this is [🌲 Tree Compressor](./tree_compresser), a **compressed tree representation** optimised for storing trees with many duplicated subtress.
|
||||
- 🔗 **[Live Example](https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/stac/climate-dt/?class=od%2Cd1&dataset=climate-dt)**.
|
||||
|
||||
---
|
||||
|
||||
### 🌐 [Qubed Web Query Builder](./web_query_builder)
|
||||
> **Web Frontend**
|
||||
|
||||
- 👀 Displays data from the **STAC Server** in an intuitive user interface.
|
||||
- 🌍 **[Try the Live Demo](https://qubed.lumi.apps.dte.destination-earth.eu/)**.
|
||||
|
||||
---
|
||||
|
||||
### TODO: 🐍 [Qubed Python Query Builder](./python_query_builder)
|
||||
> **Python Client**
|
||||
|
||||
- 🤖 A Python client for the **STAC Server**.
|
||||
- 📘 Reference implementation of the [Datacube STAC Extension](./structured_stac.md).
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Deployment Instructions
|
||||
|
||||
Deploy all components to **Kubernetes** using the provided [Helm Chart](./helm_chart).
|
||||
|
||||
---
|
||||
|
||||
### 🛠️ Future Enhancements
|
||||
- Intgration **Query Builder Web** with Polytope to contruct a full polytope query.
|
||||
- A JS polytope client implementation to allow performing the polytope query and getting the result all in the browser.
|
||||
|
||||
---
|
||||
|
50
ROADMAP.md
Normal file
50
ROADMAP.md
Normal file
@ -0,0 +1,50 @@
|
||||
|
||||
Initial Python Implementation
|
||||
[x] Basic Qube datastructure
|
||||
[x] Compression
|
||||
[x] Set Operations (Union, Difference, Intersection...)
|
||||
[x] Query with request
|
||||
[x] Iteration over leaves
|
||||
[x] Iteration over datacubes
|
||||
[x] Command line creation from fdb list --compact
|
||||
[ ] Set up periodic updates to climate-dt/extremes-dt again
|
||||
[ ] Maybe also do production db?
|
||||
[ ] Do mars list to contraints conversion
|
||||
[ ] protobuf serialization
|
||||
|
||||
|
||||
Rust port
|
||||
[ ] Initial object
|
||||
[ ] Sort out ownership issues, (one arena owned by python object)
|
||||
[ ] Compression
|
||||
[ ] Set Operations
|
||||
[ ] Query with request
|
||||
[ ] Iteration over leaves
|
||||
[ ] Iteration over datacubes
|
||||
[ ] Set up periodic updates to climate-dt/extremes-dt again
|
||||
|
||||
## API
|
||||
|
||||
Qubed will provide a core compressed tree data structure called a Qube with:
|
||||
|
||||
Methods to convert to and from:
|
||||
- [x] A human readable representation like those seen above.
|
||||
- [x] An HTML version where subtrees can be collapsed.
|
||||
- [ ] An compact protobuf-based binary format
|
||||
- [x] Nested python dictionaries or JSON
|
||||
- [/] The output of [fdb list](https://confluence.ecmwf.int/display/FDB/fdb-list)
|
||||
- [ ] [mars list][mars list]
|
||||
- [ ] [constraints.json][constraints]
|
||||
|
||||
[constraints]: https://object-store.os-api.cci2.ecmwf.int/cci2-prod-catalogue/resources/reanalysis-era5-land/constraints_a0ae5b42d67869674e13fba9fd055640bcffc37c24578be1f465d7d5ab2c7ee5.json
|
||||
[mars list]: https://git.ecmwf.int/projects/CDS/repos/cads-forms-reanalysis/browse/reanalysis-era5-single-levels/gecko-config/mars.list?at=refs%2Fheads%2Fprod
|
||||
|
||||
Useful algorithms:
|
||||
- [x] Compression
|
||||
- [/] Union/Intersection/Difference
|
||||
|
||||
Performant Membership Queries
|
||||
- Identifier membership
|
||||
- Datacube query (selection)
|
||||
|
||||
Metadata Storage
|
@ -1,7 +0,0 @@
|
||||
# stac-catalog
|
||||
|
||||
```
|
||||
# Make and activate a python environment
|
||||
pip install -r requirements.txt
|
||||
./run.sh
|
||||
```
|
@ -1,9 +0,0 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
CMD ["fastapi", "run", "main.py"]
|
@ -1 +0,0 @@
|
||||
from .fdb_schema_parser import FDBSchema, FDBSchemaFile, KeySpec, Key
|
@ -1,371 +0,0 @@
|
||||
import dataclasses
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
import pe
|
||||
from pe.actions import Pack
|
||||
from pe.operators import Class, Star
|
||||
|
||||
from .fdb_types import FDB_type_to_implementation, FDBType
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class KeySpec:
|
||||
"""
|
||||
Represents the specification of a single key in an FDB schema file. For example in
|
||||
```
|
||||
[ class, expver, stream=lwda, date, time, domain?
|
||||
[ type=ofb/mfb/oai
|
||||
[ obsgroup, reportype ]]]
|
||||
```
|
||||
class, expver, type=ofdb/mfb/oai etc are the KeySpecs
|
||||
|
||||
These can have additional information such as: flags like `domain?`, allowed values like `type=ofb/mfb/oai`
|
||||
or specify type information with `date: ClimateMonthly`
|
||||
|
||||
"""
|
||||
|
||||
key: str
|
||||
type: FDBType = field(default_factory=FDBType)
|
||||
flag: str | None = None
|
||||
values: tuple = field(default_factory=tuple)
|
||||
comment: str = ""
|
||||
|
||||
def __repr__(self):
|
||||
repr = self.key
|
||||
if self.flag:
|
||||
repr += self.flag
|
||||
# if self.type:
|
||||
# repr += f":{self.type}"
|
||||
if self.values:
|
||||
repr += "=" + "/".join(self.values)
|
||||
return repr
|
||||
|
||||
def matches(self, key, value):
|
||||
# Sanity check!
|
||||
if self.key != key:
|
||||
return False
|
||||
|
||||
# Some keys have a set of allowed values type=ofb/mfb/oai
|
||||
if self.values:
|
||||
if value not in self.values:
|
||||
return False
|
||||
|
||||
# Check the formatting of values like Time or Date
|
||||
if self.type and not self.type.validate(value):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def is_optional(self):
|
||||
if self.flag is None:
|
||||
return False
|
||||
return "?" in self.flag
|
||||
|
||||
def is_allable(self):
|
||||
if self.flag is None:
|
||||
return False
|
||||
return "*" in self.flag
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Comment:
|
||||
"Represents a comment node in the schema"
|
||||
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FDBSchemaTypeDef:
|
||||
"Mapping between FDB schema key names and FDB Schema Types, i.e expver is of type Expver"
|
||||
|
||||
key: str
|
||||
type: str
|
||||
|
||||
|
||||
# This is the schema grammar written in PEG format
|
||||
fdb_schema = pe.compile(
|
||||
r"""
|
||||
FDB < Line+ EOF
|
||||
Line < Schema / Comment / TypeDef / empty
|
||||
|
||||
# Comments
|
||||
Comment <- "#" ~non_eol*
|
||||
non_eol <- [\x09\x20-\x7F] / non_ascii
|
||||
non_ascii <- [\x80-\uD7FF\uE000-\U0010FFFF]
|
||||
|
||||
# Default Type Definitions
|
||||
TypeDef < String ":" String ";"
|
||||
|
||||
# Schemas are the main attraction
|
||||
# They're a tree of KeySpecs.
|
||||
Schema < "[" KeySpecs (","? Schema)* "]"
|
||||
|
||||
# KeySpecs can be just a name i.e expver
|
||||
# Can also have a type expver:int
|
||||
# Or a flag expver?
|
||||
# Or values expver=xxx
|
||||
KeySpecs < KeySpec_ws ("," KeySpec_ws)*
|
||||
KeySpec_ws < KeySpec
|
||||
KeySpec <- key:String (flag:Flag)? (type:Type)? (values:Values)? ([ ]* comment:Comment)?
|
||||
Flag <- ~("?" / "-" / "*")
|
||||
Type <- ":" [ ]* String
|
||||
Values <- "=" String ("/" String)*
|
||||
|
||||
# Low level stuff
|
||||
String <- ~([a-zA-Z0-9_]+)
|
||||
EOF <- !.
|
||||
empty <- ""
|
||||
""",
|
||||
actions={
|
||||
"Schema": Pack(tuple),
|
||||
"KeySpec": KeySpec,
|
||||
"Values": Pack(tuple),
|
||||
"Comment": Comment,
|
||||
"TypeDef": FDBSchemaTypeDef,
|
||||
},
|
||||
ignore=Star(Class("\t\f\r\n ")),
|
||||
# flags=pe.DEBUG,
|
||||
)
|
||||
|
||||
|
||||
def post_process(entries):
|
||||
"Take the raw output from the PEG parser and split it into type definitions and schema entries."
|
||||
typedefs = {}
|
||||
schemas = []
|
||||
for entry in entries:
|
||||
match entry:
|
||||
case c if isinstance(c, Comment):
|
||||
pass
|
||||
case t if isinstance(t, FDBSchemaTypeDef):
|
||||
typedefs[t.key] = t.type
|
||||
case s if isinstance(s, tuple):
|
||||
schemas.append(s)
|
||||
case _:
|
||||
raise ValueError
|
||||
return typedefs, tuple(schemas)
|
||||
|
||||
|
||||
def determine_types(types, node):
|
||||
"Recursively walk a schema tree and insert the type information."
|
||||
if isinstance(node, tuple):
|
||||
return [determine_types(types, n) for n in node]
|
||||
return dataclasses.replace(node, type=types.get(node.key, FDBType()))
|
||||
|
||||
|
||||
@dataclass
|
||||
class Key:
|
||||
key: str
|
||||
value: Any
|
||||
key_spec: KeySpec
|
||||
reason: str
|
||||
|
||||
def __bool__(self):
|
||||
return self.reason in {"Matches", "Skipped", "Select All"}
|
||||
|
||||
def emoji(self):
|
||||
return {"Matches": "✅", "Skipped": "⏭️", "Select All": "★"}.get(
|
||||
self.reason, "❌"
|
||||
)
|
||||
|
||||
def info(self):
|
||||
return f"{self.emoji()} {self.key:<12}= {str(self.value):<12} ({self.key_spec}) {self.reason if not self else ''}"
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.key}={self.key_spec.type.format(self.value)}"
|
||||
|
||||
def as_json(self):
|
||||
return dict(
|
||||
key=self.key,
|
||||
value=self.as_string(),
|
||||
reason=self.reason,
|
||||
)
|
||||
|
||||
|
||||
class FDBSchema:
|
||||
"""
|
||||
Represents a parsed FDB Schema file.
|
||||
Has methods to validate and convert request dictionaries to a mars request form with validation and type information.
|
||||
"""
|
||||
|
||||
def __init__(self, string, defaults: dict[str, str] = {}):
|
||||
"""
|
||||
1. Use a PEG parser on a schema string,
|
||||
2. Separate the output into schemas and typedefs
|
||||
3. Insert any concrete implementations of types from fdb_types.py defaulting to generic string type
|
||||
4. Walk the schema tree and annotate it with type information.
|
||||
"""
|
||||
m = fdb_schema.match(string)
|
||||
g = list(m.groups())
|
||||
self._str_types, schemas = post_process(g)
|
||||
self.types = {
|
||||
key: FDB_type_to_implementation[type]
|
||||
for key, type in self._str_types.items()
|
||||
}
|
||||
self.schemas = determine_types(self.types, schemas)
|
||||
self.defaults = defaults
|
||||
|
||||
def __repr__(self):
|
||||
return json.dumps(
|
||||
dict(schemas=self.schemas, defaults=self.defaults), indent=4, default=repr
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def consume_key(
|
||||
cls, key_spec: KeySpec, request: dict[str, Any]
|
||||
) -> Key:
|
||||
key = key_spec.key
|
||||
try:
|
||||
value = request[key]
|
||||
except KeyError:
|
||||
if key_spec.is_optional():
|
||||
return Key(key_spec.key, "", key_spec, "Skipped")
|
||||
if key_spec.is_allable():
|
||||
return Key(key_spec.key, "", key_spec, "Select All")
|
||||
else:
|
||||
return Key(
|
||||
key_spec.key, "", key_spec, "Key Missing"
|
||||
)
|
||||
|
||||
if key_spec.matches(key, value):
|
||||
return Key(
|
||||
key_spec.key,
|
||||
key_spec.type.parse(value),
|
||||
key_spec,
|
||||
"Matches",
|
||||
)
|
||||
else:
|
||||
return Key(
|
||||
key_spec.key, value, key_spec, "Incorrect Value"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _DFS_match(
|
||||
cls, tree: list, request: dict[str, Any]
|
||||
) -> tuple[bool | list, list[Key]]:
|
||||
"""Do a DFS on the schema tree, returning the deepest matching path
|
||||
At each stage return whether we matched on this path, and the path itself.
|
||||
|
||||
When traversing the tree there are three cases to consider:
|
||||
1. base case []
|
||||
2. one schema [k, k, k, [k, k, k]]
|
||||
3. list of schemas [[k,k,k], [k,k,k], [k,k,k]]
|
||||
"""
|
||||
# Case 1: Base Case
|
||||
if not tree:
|
||||
return True, []
|
||||
|
||||
# Case 2: [k, k, k, [k, k, k]]
|
||||
if isinstance(tree[0], KeySpec):
|
||||
node, *tree = tree
|
||||
# Check if this node is in the request
|
||||
match_result = cls.consume_key(node, request)
|
||||
|
||||
# If if isn't then terminate this path here
|
||||
if not match_result:
|
||||
return False, [match_result,] # fmt: skip
|
||||
|
||||
# Otherwise continue walking the tree and return the best result
|
||||
matched, path = cls._DFS_match(tree, request)
|
||||
|
||||
# Don't put the key in the path if it's optional and we're skipping it.
|
||||
if match_result.reason != "Skipped":
|
||||
path = [match_result,] + path # fmt: skip
|
||||
|
||||
return matched, path
|
||||
|
||||
# Case 3: [[k, k, k], [k, k, k]]
|
||||
branches = []
|
||||
for branch in tree:
|
||||
matched, branch_path = cls._DFS_match(branch, request)
|
||||
|
||||
# If this branch matches, terminate the DFS and use this.
|
||||
if matched:
|
||||
return branch, branch_path
|
||||
else:
|
||||
branches.append(branch_path)
|
||||
|
||||
# If no branch matches, return the one with the deepest match
|
||||
return False, max(branches, key=len)
|
||||
|
||||
@classmethod
|
||||
def _DFS_match_all(
|
||||
cls, tree: list, request: dict[str, Any]
|
||||
) -> list[list[Key]]:
|
||||
"""Do a DFS on the schema tree, returning all matching paths or partial matches.
|
||||
At each stage return all matching paths and the deepest partial matches.
|
||||
|
||||
When traversing the tree there are three cases to consider:
|
||||
1. base case []
|
||||
2. one schema [k, k, k, [k, k, k]]
|
||||
3. list of schemas [[k,k,k], [k,k,k], [k,k,k]]
|
||||
"""
|
||||
# Case 1: Base Case
|
||||
if not tree:
|
||||
return [[]]
|
||||
|
||||
# Case 2: [k, k, k, [k, k, k]]
|
||||
if isinstance(tree[0], KeySpec):
|
||||
node, *tree = tree
|
||||
# Check if this node is in the request
|
||||
request_values = request.get(node.key, None)
|
||||
|
||||
if request_values is None:
|
||||
# If the key is not in the request, return a partial match with Key Missing
|
||||
return [[Key(node.key, "", node, "Key Missing")]]
|
||||
|
||||
# If the request value is a list, try to match each value
|
||||
if isinstance(request_values, list):
|
||||
all_matches = []
|
||||
for value in request_values:
|
||||
match_result = cls.consume_key(node, {node.key: value})
|
||||
|
||||
if match_result:
|
||||
sub_matches = cls._DFS_match_all(tree, request)
|
||||
for match in sub_matches:
|
||||
if match_result.reason != "Skipped":
|
||||
match.insert(0, match_result)
|
||||
all_matches.append(match)
|
||||
|
||||
return all_matches if all_matches else [[Key(node.key, "", node, "No Match Found")]]
|
||||
else:
|
||||
# Handle a single value
|
||||
match_result = cls.consume_key(node, request)
|
||||
|
||||
# If it isn't then return a partial match with Key Missing
|
||||
if not match_result:
|
||||
return [[Key(node.key, "", node, "Key Missing")]]
|
||||
|
||||
# Continue walking the tree and get all matches
|
||||
all_matches = cls._DFS_match_all(tree, request)
|
||||
|
||||
# Prepend the current match to all further matches
|
||||
for match in all_matches:
|
||||
if match_result.reason != "Skipped":
|
||||
match.insert(0, match_result)
|
||||
|
||||
return all_matches
|
||||
|
||||
# Case 3: [[k, k, k], [k, k, k]]
|
||||
all_branch_matches = []
|
||||
for branch in tree:
|
||||
branch_matches = cls._DFS_match_all(branch, request)
|
||||
all_branch_matches.extend(branch_matches)
|
||||
|
||||
# Return all of the deepest partial matches or complete matches
|
||||
return all_branch_matches
|
||||
|
||||
def match_all(self, request: dict[str, Any]):
|
||||
request = request | self.defaults
|
||||
return self._DFS_match_all(self.schemas, request)
|
||||
|
||||
def match(self, request: dict[str, Any]):
|
||||
request = request | self.defaults
|
||||
return self._DFS_match(self.schemas, request)
|
||||
|
||||
|
||||
class FDBSchemaFile(FDBSchema):
|
||||
def __init__(self, path: str):
|
||||
with open(path, "r") as f:
|
||||
return super().__init__(f.read())
|
@ -1,83 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, date, time
|
||||
|
||||
|
||||
@dataclass(repr=False)
|
||||
class FDBType:
|
||||
"""
|
||||
Holds information about how to format and validate a given FDB Schema type like Time or Expver
|
||||
This base type represents a string and does no validation or formatting. It's the default type.
|
||||
"""
|
||||
|
||||
name: str = "String"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.name
|
||||
|
||||
def validate(self, s: Any) -> bool:
|
||||
try:
|
||||
self.parse(s)
|
||||
return True
|
||||
except (ValueError, AssertionError):
|
||||
return False
|
||||
|
||||
def format(self, s: Any) -> str:
|
||||
return str(s).lower()
|
||||
|
||||
def parse(self, s: str) -> Any:
|
||||
return s
|
||||
|
||||
|
||||
@dataclass(repr=False)
|
||||
class Expver_FDBType(FDBType):
|
||||
name: str = "Expver"
|
||||
|
||||
def parse(self, s: str) -> str:
|
||||
assert bool(re.match(".{4}", s))
|
||||
return s
|
||||
|
||||
|
||||
@dataclass(repr=False)
|
||||
class Time_FDBType(FDBType):
|
||||
name: str = "Time"
|
||||
time_format = "%H%M"
|
||||
|
||||
def format(self, t: time) -> str:
|
||||
return t.strftime(self.time_format)
|
||||
|
||||
def parse(self, s: datetime | str | int) -> time:
|
||||
if isinstance(s, str):
|
||||
assert len(s) == 4
|
||||
return datetime.strptime(s, self.time_format).time()
|
||||
if isinstance(s, datetime):
|
||||
return s.time()
|
||||
return self.parse(f"{s:04}")
|
||||
|
||||
|
||||
@dataclass(repr=False)
|
||||
class Date_FDBType(FDBType):
|
||||
name: str = "Date"
|
||||
date_format: str = "%Y%m%d"
|
||||
|
||||
def format(self, d: Any) -> str:
|
||||
if isinstance(d, date):
|
||||
return d.strftime(self.date_format)
|
||||
if isinstance(d, int):
|
||||
return f"{d:08}"
|
||||
else:
|
||||
return d
|
||||
|
||||
def parse(self, s: datetime | str | int) -> date:
|
||||
if isinstance(s, str):
|
||||
return datetime.strptime(s, self.date_format).date()
|
||||
elif isinstance(s, datetime):
|
||||
return s.date()
|
||||
return self.parse(f"{s:08}")
|
||||
|
||||
|
||||
FDB_type_to_implementation = defaultdict(lambda: FDBType()) | {
|
||||
cls.name: cls() for cls in [Expver_FDBType, Time_FDBType, Date_FDBType]
|
||||
}
|
130
backend/main.py
130
backend/main.py
@ -1,130 +0,0 @@
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fdb_schema import FDBSchemaFile
|
||||
|
||||
app = FastAPI()
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.mount("/app", StaticFiles(directory="../webapp"), name="static")
|
||||
|
||||
|
||||
language_yaml = "./language.yaml"
|
||||
import yaml
|
||||
|
||||
with open(language_yaml, "r") as f:
|
||||
mars_language = yaml.safe_load(f)["_field"]
|
||||
|
||||
###### Load FDB Schema
|
||||
schema = FDBSchemaFile("./standard_fdb_schema")
|
||||
# schema = FDBSchemaFile("./test_schema")
|
||||
|
||||
def request_to_dict(request: Request) -> Dict[str, Any]:
|
||||
# Convert query parameters to dictionary format
|
||||
request_dict = dict(request.query_params)
|
||||
for key, value in request_dict.items():
|
||||
# Convert comma-separated values into lists
|
||||
if "," in value:
|
||||
request_dict[key] = value.split(",")
|
||||
return request_dict
|
||||
|
||||
@app.get("/simple")
|
||||
async def get_tree(request: Request):
|
||||
request_dict = request_to_dict(request)
|
||||
print(request_dict)
|
||||
target = next((k for k,v in request_dict.items() if v == "????"), None)
|
||||
if not target:
|
||||
return {"error": "No target found in request, there must be one key with value '????'"}
|
||||
|
||||
current_query_params = "&".join(f"{k}={v}" for k, v in request_dict.items() if k != target)
|
||||
if len(current_query_params) > 1:
|
||||
current_query_params += "&"
|
||||
|
||||
stac_collection = {
|
||||
"type": "Collection",
|
||||
"stac_version": "1.0.0",
|
||||
"id": target,
|
||||
"title" : target.capitalize(),
|
||||
"key_type": mars_language.get(target, {}).get("type", ""),
|
||||
"description": mars_language.get(target, {}).get("description", ""),
|
||||
"values": mars_language.get(target, {}).get("values", ""),
|
||||
"links": [
|
||||
{
|
||||
"title": str(value[-1] if isinstance(value, list) else value),
|
||||
"href": f"/tree?{current_query_params}{target}={value[0] if isinstance(value, list) else value}",
|
||||
"rel": "child",
|
||||
"type": "application/json",
|
||||
|
||||
}
|
||||
|
||||
for value in mars_language.get(target, {}).get("values", [])
|
||||
]
|
||||
}
|
||||
|
||||
return stac_collection
|
||||
|
||||
|
||||
@app.get("/tree")
|
||||
async def get_tree(request: Request):
|
||||
# Convert query parameters to dictionary format
|
||||
request_dict = request_to_dict(request)
|
||||
|
||||
# Run the schema matching logic
|
||||
matches = schema.match_all(request_dict)
|
||||
|
||||
# Only take the longest matches
|
||||
max_len = max(len(m) for m in matches)
|
||||
matches = [m for m in matches if len(m) == max_len]
|
||||
|
||||
# Take the ends of all partial matches, ignore those that are full matches
|
||||
# Full matches are indicated by the last key having boolean value True
|
||||
key_frontier = defaultdict(list)
|
||||
for match in matches:
|
||||
if not match[-1]:
|
||||
key_frontier[match[-1].key].append([m for m in match[:-1]])
|
||||
|
||||
|
||||
|
||||
def make_link(key_name, paths):
|
||||
"""Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link"""
|
||||
first_path = [str(p) for p in paths[0]]
|
||||
href = f"/simple?{'&'.join(first_path)}{'&' if first_path else ''}{key_name}=????"
|
||||
optional = [p[-1].key_spec.is_optional() for p in paths if len(p) > 0]
|
||||
optional_str = "Yes" if all(optional) and len(optional) > 0 else ("Sometimes" if any(optional) else "No")
|
||||
|
||||
return {
|
||||
"title": key_name,
|
||||
"optional": optional_str,
|
||||
# "optional_by_path": optional,
|
||||
"href": href,
|
||||
"rel": "child",
|
||||
"type": "application/json",
|
||||
"paths": set(tuple(f"{m.key}={m.value}" for m in p) for p in paths),
|
||||
# "description": mars_language.get(key_name, {}).get("description", ""),
|
||||
# "values": mars_language.get(key_name, {}).get("values", "")
|
||||
|
||||
}
|
||||
|
||||
|
||||
# Format the response as a STAC collection
|
||||
stac_collection = {
|
||||
"type": "Collection",
|
||||
"stac_version": "1.0.0",
|
||||
"id": "partial-matches",
|
||||
"description": "STAC collection representing potential children of this request",
|
||||
"links": [
|
||||
make_link(key_name, paths)
|
||||
for key_name, paths in key_frontier.items()
|
||||
]
|
||||
}
|
||||
|
||||
return stac_collection
|
@ -1,2 +0,0 @@
|
||||
fastapi[standard]
|
||||
pe
|
@ -1,590 +0,0 @@
|
||||
|
||||
# * Format of the rules is:
|
||||
|
||||
# [a1, a2, a3 ...[b1, b2, b3... [c1, c2, c3...]]]
|
||||
|
||||
# - The first level (a) defines which attributes are used to name the top level directory
|
||||
# - The second level (b) defines which attributes are used to name the data files
|
||||
# - The third level (c) defines which attributes are used as index keys
|
||||
|
||||
# * Rules can be grouped
|
||||
|
||||
# [a1, a2, a3 ...
|
||||
# [b1, b2, b3... [c1, c2, c3...]]
|
||||
# [B1, B2, B3... [C1, C2, C3...]]
|
||||
# ]
|
||||
|
||||
# * A list of values can be given for an attribute
|
||||
# [ ..., stream=enfo/efov, ... ]
|
||||
# This will be used when matching rules.
|
||||
|
||||
# * Attributes can be typed
|
||||
# Globally, at the begining of this file:
|
||||
|
||||
# refdate: Date;
|
||||
|
||||
# or in the context of a rule:
|
||||
# [type=cl, ... [date:ClimateMonth, ...]]
|
||||
|
||||
# Typing attributes is done when the user's requests or the GRIB values need to be modified before directories, files and indexes are created. For example, ClimateMonth will transform 2010-04-01 to 'may' internally.
|
||||
|
||||
# * Attributes can be optional
|
||||
# [ step, levelist?, param ]
|
||||
# They will be replaced internally by an empty value. It is also posiible to provide a default subtitution value: e.g. [domain?g] will consider the domain to be 'g' if missing.
|
||||
|
||||
# * Attributes can be removed:
|
||||
# [grid-]
|
||||
# This is useful to remove attributes present in the GRIB that should not be ignored
|
||||
|
||||
# * Rules are matched:
|
||||
|
||||
# - If the attributes are present in the GRIB/Request, or marked optional or ignored
|
||||
# - If a list of possible value is provided, one of them must match, for example
|
||||
# [ class, expver, stream=enfo/efov, date, time, domain ]
|
||||
# will match either stream=enfo or stream=efov, all other attributes will be matched if they exist in the GRIB or user's request
|
||||
|
||||
# * On archive:
|
||||
# - Attributes are extracted from the GRIB (namespace 'mars'), possibly modified by the attribute type
|
||||
# - Only the first rule is used, so order is important
|
||||
# - All GRIB attributes must be used by the rules, otherwise an error is raised
|
||||
|
||||
# * On retrieve:
|
||||
# - Attributes are extracted from the user's request, possibly modified by the attribute type (e.g. for handling of U/V)
|
||||
# - All the matching rules are considered
|
||||
# - Only attributes listed in the rules are used to extract values from the user's request
|
||||
|
||||
|
||||
# Default types
|
||||
|
||||
param: Param;
|
||||
step: Step;
|
||||
date: Date;
|
||||
hdate: Date;
|
||||
refdate: Date;
|
||||
latitude: Double;
|
||||
longitude: Double;
|
||||
levelist: Double;
|
||||
grid: Grid;
|
||||
expver: Expver;
|
||||
|
||||
time: Time;
|
||||
fcmonth: Integer;
|
||||
|
||||
number: Integer;
|
||||
frequency: Integer;
|
||||
direction: Integer;
|
||||
channel: Integer;
|
||||
|
||||
instrument: Integer;
|
||||
ident: Integer;
|
||||
|
||||
diagnostic: Integer;
|
||||
iteration: Integer;
|
||||
system: Integer;
|
||||
method: Integer;
|
||||
|
||||
# ???????
|
||||
|
||||
# reference: Integer;
|
||||
# fcperiod: Integer;
|
||||
|
||||
# opttime: Integer;
|
||||
# leadtime: Integer;
|
||||
|
||||
# quantile: ??????
|
||||
# range: ??????
|
||||
|
||||
# band: Integer;
|
||||
|
||||
|
||||
########################################################
|
||||
# These rules must be first, otherwise fields of These
|
||||
# classes will be index with the default rule for oper
|
||||
[ class=ti/s2, expver, stream, date, time, model
|
||||
[ origin, type, levtype, hdate?
|
||||
[ step, number?, levelist?, param ]]
|
||||
]
|
||||
|
||||
[ class=ms, expver, stream, date, time, country=de
|
||||
[ domain, type, levtype, dbase, rki, rty, ty
|
||||
[ step, levelist?, param ]]
|
||||
]
|
||||
|
||||
[ class=ms, expver, stream, date, time, country=it
|
||||
[ domain, type, levtype, model, bcmodel, icmodel:First3
|
||||
[ step, levelist?, param ]
|
||||
]
|
||||
]
|
||||
|
||||
[ class=el, expver, stream, date, time, domain
|
||||
[ origin, type, levtype
|
||||
[ step, levelist?, param ]]
|
||||
]
|
||||
|
||||
########################################################
|
||||
# The are the rules matching most of the fields
|
||||
# oper/dcda
|
||||
[ class, expver, stream=oper/dcda/scda, date, time, domain?
|
||||
|
||||
[ type=im/sim
|
||||
[ step?, ident, instrument, channel ]]
|
||||
|
||||
[ type=ssd
|
||||
[ step, param, ident, instrument, channel ]]
|
||||
|
||||
[ type=4i, levtype
|
||||
[ step, iteration, levelist, param ]]
|
||||
|
||||
[ type=me, levtype
|
||||
[ step, number, levelist?, param ]]
|
||||
|
||||
[ type=ef, levtype
|
||||
[ step, levelist?, param, channel? ]]
|
||||
|
||||
[ type=ofb/mfb
|
||||
[ obsgroup, reportype ]]
|
||||
|
||||
[ type, levtype
|
||||
[ step, levelist?, param ]]
|
||||
|
||||
]
|
||||
|
||||
# dcwv/scwv/wave
|
||||
[ class, expver, stream=dcwv/scwv/wave, date, time, domain
|
||||
[ type, levtype
|
||||
[ step, param, frequency?, direction? ]]]
|
||||
|
||||
# enfo
|
||||
[ class, expver, stream=enfo/efov, date, time, domain
|
||||
|
||||
[ type, levtype=dp, product?, section?
|
||||
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
|
||||
|
||||
[ type=tu, levtype, reference
|
||||
[ step, number, levelist?, param ]]
|
||||
|
||||
[ type, levtype
|
||||
[ step, quantile?, number?, levelist?, param ]]
|
||||
|
||||
]
|
||||
|
||||
# waef/weov
|
||||
[ class, expver, stream=waef/weov, date, time, domain
|
||||
[ type, levtype
|
||||
[ step, number?, param, frequency?, direction? ]]
|
||||
]
|
||||
|
||||
########################################################
|
||||
# enda
|
||||
[ class, expver, stream=enda, date, time, domain
|
||||
|
||||
[ type=ef/em/es/ses, levtype
|
||||
[ step, number?, levelist?, param, channel? ]]
|
||||
|
||||
[ type=ssd
|
||||
[ step, number, param, ident, instrument, channel ]]
|
||||
|
||||
|
||||
[ type, levtype
|
||||
[ step, number?, levelist?, param ]]
|
||||
]
|
||||
|
||||
# ewda
|
||||
[ class, expver, stream=ewda, date, time, domain
|
||||
[ type, levtype
|
||||
[ step, number?, param, frequency?, direction? ]]
|
||||
]
|
||||
|
||||
|
||||
########################################################
|
||||
# elda
|
||||
[ class, expver, stream=elda, date, time, domain?
|
||||
|
||||
[ type=ofb/mfb
|
||||
[ obsgroup, reportype ]]
|
||||
|
||||
[ type, levtype, anoffset
|
||||
[ step, number?, levelist?, iteration?, param, channel? ]]
|
||||
]
|
||||
|
||||
# ewda
|
||||
[ class, expver, stream=ewla, date, time, domain
|
||||
[ type, levtype, anoffset
|
||||
[ step, number?, param, frequency?, direction? ]]
|
||||
]
|
||||
|
||||
########################################################
|
||||
# elda
|
||||
[ class, expver, stream=lwda, date, time, domain?
|
||||
|
||||
[ type=ssd, anoffset
|
||||
[ step, param, ident, instrument, channel ]]
|
||||
|
||||
[type=me, levtype, anoffset
|
||||
[ number, step, levelist?, param]]
|
||||
|
||||
[ type=4i, levtype, anoffset
|
||||
[ step, iteration, levelist, param ]]
|
||||
|
||||
[ type=ofb/mfb
|
||||
[ obsgroup, reportype ]]
|
||||
|
||||
[ type, levtype, anoffset
|
||||
[ step, levelist?, param]]
|
||||
]
|
||||
|
||||
# ewda
|
||||
[ class, expver, stream=lwwv, date, time, domain
|
||||
[ type, levtype, anoffset
|
||||
[ step, param, frequency?, direction? ]]
|
||||
]
|
||||
########################################################
|
||||
# amap
|
||||
[ class, expver, stream=amap, date, time, domain
|
||||
[ type, levtype, origin
|
||||
[ step, levelist?, param ]]]
|
||||
|
||||
# maed
|
||||
[ class, expver, stream=maed, date, time, domain
|
||||
[ type, levtype, origin
|
||||
[ step, levelist?, param ]]]
|
||||
|
||||
# mawv
|
||||
[ class, expver, stream=mawv, date, time, domain
|
||||
[ type, levtype, origin
|
||||
[ step, param, frequency?, direction? ]]]
|
||||
|
||||
# cher
|
||||
[ class, expver, stream=cher, date, time, domain
|
||||
[ type, levtype
|
||||
[ step, levelist, param ]]]
|
||||
|
||||
|
||||
# efhc
|
||||
[ class, expver, stream=efhc, refdate, time, domain
|
||||
[ type, levtype, date
|
||||
[ step, number?, levelist?, param ]]]
|
||||
|
||||
# efho
|
||||
[ class, expver, stream=efho, date, time, domain
|
||||
[ type, levtype, hdate
|
||||
[ step, number?, levelist?, param ]]]
|
||||
|
||||
|
||||
# efhs
|
||||
[ class, expver, stream=efhs, date, time, domain
|
||||
[ type, levtype
|
||||
[ step, quantile?, number?, levelist?, param ]]]
|
||||
|
||||
# wehs
|
||||
[ class, expver, stream=wehs, date, time, domain
|
||||
[ type, levtype
|
||||
[ step, quantile?, number?, levelist?, param ]]]
|
||||
|
||||
# kwbc
|
||||
[ class, expver, stream=kwbc, date, time, domain
|
||||
[ type, levtype
|
||||
[ step, number?, levelist?, param ]]]
|
||||
|
||||
# ehmm
|
||||
[ class, expver, stream=ehmm, date, time, domain
|
||||
[ type, levtype, hdate
|
||||
[ fcmonth, levelist?, param ]]]
|
||||
|
||||
|
||||
# ammc/cwao/edzw/egrr/lfpw/rjtd/toga
|
||||
[ class, expver, stream=ammc/cwao/edzw/egrr/lfpw/rjtd/toga/fgge, date, time, domain
|
||||
[ type, levtype
|
||||
[ step, levelist?, param ]]]
|
||||
|
||||
########################################################################
|
||||
|
||||
# enfh
|
||||
[ class, expver, stream=enfh, date, time, domain
|
||||
|
||||
[ type, levtype=dp, hdate, product?, section?
|
||||
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
|
||||
|
||||
[ type, levtype, hdate
|
||||
[ step, number?, levelist?, param ]]
|
||||
]
|
||||
|
||||
# enwh
|
||||
[ class, expver, stream=enwh, date, time, domain
|
||||
[ type, levtype, hdate
|
||||
[ step, number?, param, frequency?, direction? ]]
|
||||
]
|
||||
|
||||
########################################################################
|
||||
# sens
|
||||
[ class, expver, stream=sens, date, time, domain
|
||||
[ type, levtype
|
||||
[ step, diagnostic, iteration, levelist?, param ]]]
|
||||
|
||||
########################################################################
|
||||
# esmm
|
||||
[ class, expver, stream=esmm, date, time, domain
|
||||
[ type, levtype
|
||||
[ fcmonth, levelist?, param ]]]
|
||||
# ewhc
|
||||
[ class, expver, stream=ewhc, refdate, time, domain
|
||||
[ type, levtype, date
|
||||
[ step, number?, param, frequency?, direction? ]]]
|
||||
|
||||
########################################################################
|
||||
# ewho
|
||||
[ class, expver, stream=ewho, date, time, domain
|
||||
[ type, levtype, hdate
|
||||
[ step, number?, param, frequency?, direction? ]]]
|
||||
|
||||
# mfam
|
||||
[ class, expver, stream=mfam, date, time, domain
|
||||
|
||||
[ type=pb/pd, levtype, origin, system?, method
|
||||
[ fcperiod, quantile, levelist?, param ]]
|
||||
|
||||
[ type, levtype, origin, system?, method
|
||||
[ fcperiod, number?, levelist?, param ]]
|
||||
|
||||
]
|
||||
|
||||
# mfhm
|
||||
[ class, expver, stream=mfhm, refdate, time, domain
|
||||
[ type, levtype, origin, system?, method, date?
|
||||
[ fcperiod, number?, levelist?, param ]]]
|
||||
# mfhw
|
||||
[ class, expver, stream=mfhw, refdate, time, domain
|
||||
[ type, levtype, origin, system?, method, date
|
||||
[ step, number?, param ]]]
|
||||
# mfwm
|
||||
[ class, expver, stream=mfwm, date, time, domain
|
||||
[ type, levtype, origin, system?, method
|
||||
[ fcperiod, number, param ]]]
|
||||
# mhwm
|
||||
[ class, expver, stream=mhwm, refdate, time, domain
|
||||
[ type, levtype, origin, system?, method, date
|
||||
[ fcperiod, number, param ]]]
|
||||
|
||||
# mmsf
|
||||
[ class, expver, stream=mmsf, date, time, domain
|
||||
|
||||
[ type, levtype=dp, origin, product, section, system?, method
|
||||
[ step, number, levelist?, latitude?, longitude?, range?, param ]]
|
||||
|
||||
[ type, levtype, origin, system?, method
|
||||
[ step, number, levelist?, param ]]
|
||||
]
|
||||
|
||||
# mnfc
|
||||
[ class, expver, stream=mnfc, date, time, domain
|
||||
|
||||
[ type, levtype=dp, origin, product, section, system?, method
|
||||
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
|
||||
|
||||
[ type, levtype, origin, system?, method
|
||||
[ step, number?, levelist?, param ]]
|
||||
]
|
||||
|
||||
# mnfh
|
||||
[ class, expver, stream=mnfh, refdate, time, domain
|
||||
[ type, levtype=dp, origin, product, section, system?, method, date
|
||||
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
|
||||
[ type, levtype, origin, system?, method, date?
|
||||
[ step, number?, levelist?, param ]]
|
||||
]
|
||||
|
||||
# mnfm
|
||||
[ class, expver, stream=mnfm, date, time, domain
|
||||
[ type, levtype, origin, system?, method
|
||||
[ fcperiod, number?, levelist?, param ]]]
|
||||
|
||||
# mnfw
|
||||
[ class, expver, stream=mnfw, date, time, domain
|
||||
[ type, levtype, origin, system?, method
|
||||
[ step, number?, param ]]]
|
||||
|
||||
# ea/mnth
|
||||
[ class=ea, expver, stream=mnth, date, domain
|
||||
[ type, levtype
|
||||
[ time, step?, levelist?, param ]]]
|
||||
|
||||
# mnth
|
||||
[ class, expver, stream=mnth, domain
|
||||
[ type=cl, levtype
|
||||
[ date: ClimateMonthly, time, levelist?, param ]]
|
||||
[ type, levtype
|
||||
[ date , time, step?, levelist?, param ]]]
|
||||
|
||||
# mofc
|
||||
[ class, expver, stream=mofc, date, time, domain
|
||||
[ type, levtype=dp, product, section, system?, method
|
||||
[ step, number?, levelist?, latitude?, longitude?, range?, param ]]
|
||||
[ type, levtype, system?, method
|
||||
[ step, number?, levelist?, param ]]
|
||||
]
|
||||
|
||||
# mofm
|
||||
[ class, expver, stream=mofm, date, time, domain
|
||||
[ type, levtype, system?, method
|
||||
[ fcperiod, number, levelist?, param ]]]
|
||||
|
||||
# mmsa/msmm
|
||||
[ class, expver, stream=mmsa, date, time, domain
|
||||
[ type, levtype, origin, system?, method
|
||||
[ fcmonth, number?, levelist?, param ]]]
|
||||
|
||||
[ class, expver, stream=msmm, date, time, domain
|
||||
[ type, levtype, origin, system?, method
|
||||
[ fcmonth, number?, levelist?, param ]]]
|
||||
|
||||
# ocea
|
||||
[ class, expver, stream=ocea, date, time, domain
|
||||
[ type, levtype, product, section, system?, method
|
||||
[ step, number, levelist?, latitude?, longitude?, range?, param ]]
|
||||
]
|
||||
|
||||
#=# seas
|
||||
[ class, expver, stream=seas, date, time, domain
|
||||
|
||||
[ type, levtype=dp, product, section, system?, method
|
||||
[ step, number, levelist?, latitude?, longitude?, range?, param ]]
|
||||
|
||||
[ type, levtype, system?, method
|
||||
[ step, number, levelist?, param ]]
|
||||
]
|
||||
|
||||
# sfmm/smma
|
||||
[ class, expver, stream=sfmm/smma, date, time, domain
|
||||
[ type, levtype, system?, method
|
||||
[ fcmonth, number?, levelist?, param ]]]
|
||||
|
||||
# supd
|
||||
[ class=od, expver, stream=supd, date, time, domain
|
||||
[ type, levtype, origin?, grid
|
||||
[ step, levelist?, param ]]]
|
||||
|
||||
# For era
|
||||
[ class, expver, stream=supd, date, time, domain
|
||||
[ type, levtype, grid- # The minus sign is here to consume 'grid', but don't index it
|
||||
[ step, levelist?, param ]]]
|
||||
|
||||
# swmm
|
||||
[ class, expver, stream=swmm, date, time, domain
|
||||
[ type, levtype, system?, method
|
||||
[ fcmonth, number, param ]]]
|
||||
|
||||
# wamf
|
||||
[ class, expver, stream=wamf, date, time, domain
|
||||
[ type, levtype, system?, method
|
||||
[ step, number?, param ]]]
|
||||
|
||||
# ea/wamo
|
||||
[ class=ea, expver, stream=wamo, date, domain
|
||||
[ type, levtype
|
||||
[ time, step?, param ]]]
|
||||
|
||||
# wamo
|
||||
[ class, expver, stream=wamo, domain
|
||||
[ type=cl, levtype
|
||||
[ date: ClimateMonthly, time, param ]]
|
||||
[ type, levtype
|
||||
[ date, time, step?, param ]]]
|
||||
|
||||
# wamd
|
||||
[ class, expver, stream=wamd, date, domain
|
||||
[ type, levtype
|
||||
[ param ]]]
|
||||
|
||||
# wasf
|
||||
[ class, expver, stream=wasf, date, time, domain
|
||||
[ type, levtype, system?, method
|
||||
[ step, number, param ]]]
|
||||
# wmfm
|
||||
[ class, expver, stream=wmfm, date, time, domain
|
||||
[ type, levtype, system?, method
|
||||
[ fcperiod, number, param ]]]
|
||||
|
||||
# moda
|
||||
[ class, expver, stream=moda, date, domain
|
||||
[ type, levtype
|
||||
[ levelist?, param ]]]
|
||||
|
||||
# msdc/mdfa/msda
|
||||
[ class, expver, stream=msdc/mdfa/msda, domain
|
||||
[ type, levtype
|
||||
[ date, time?, step?, levelist?, param ]]]
|
||||
|
||||
|
||||
|
||||
# seap
|
||||
[ class, expver, stream=seap, date, time, domain
|
||||
[ type=sv/svar, levtype, origin, method?
|
||||
[ step, leadtime, opttime, number, levelist?, param ]]
|
||||
|
||||
[ type=ef, levtype, origin
|
||||
[ step, levelist?, param, channel? ]]
|
||||
|
||||
[ type, levtype, origin
|
||||
[ step, levelist?, param ]]
|
||||
|
||||
]
|
||||
|
||||
[ class, expver, stream=mmaf, date, time, domain
|
||||
[ type, levtype, origin, system?, method
|
||||
[ step, number, levelist?, param ]]
|
||||
]
|
||||
|
||||
[ class, expver, stream=mmam, date, time, domain
|
||||
[ type, levtype, origin, system?, method
|
||||
[ fcmonth, number, levelist?, param ]]
|
||||
]
|
||||
|
||||
|
||||
[ class, expver, stream=dacl, domain
|
||||
[ type=pb, levtype
|
||||
[ date: ClimateDaily, time, step, quantile, levelist?, param ]]
|
||||
[ type, levtype
|
||||
[ date: ClimateDaily, time, step, levelist?, param ]]
|
||||
|
||||
]
|
||||
|
||||
[ class, expver, stream=dacw, domain
|
||||
[ type=pb, levtype
|
||||
[ date: ClimateDaily, time, step, quantile, param ]]
|
||||
[ type, levtype
|
||||
[ date: ClimateDaily, time, step, param ]]
|
||||
|
||||
]
|
||||
|
||||
[ class, expver, stream=edmm/ewmm, date, time, domain
|
||||
[ type=ssd
|
||||
[ step, number, param, ident, instrument, channel ]]
|
||||
[ type, levtype
|
||||
[ step, number, levelist?, param ]]
|
||||
]
|
||||
|
||||
[ class, expver, stream=edmo/ewmo, date, domain
|
||||
[ type, levtype
|
||||
[ number, levelist?, param ]]
|
||||
]
|
||||
|
||||
# stream gfas
|
||||
[ class=mc/rd, expver, stream=gfas, date, time, domain
|
||||
[ type=ga, levtype
|
||||
[ step, param ]]
|
||||
|
||||
[ type=gsd
|
||||
[ param, ident, instrument ]]
|
||||
|
||||
]
|
||||
|
||||
# class is e2
|
||||
[ class, expver, stream=espd, date, time, domain
|
||||
[ type, levtype, origin, grid
|
||||
[ step, number, levelist?, param ]]]
|
||||
|
||||
[ class=cs, expver, stream, date:Default, time, domain
|
||||
[ type, levtype
|
||||
[ step, levelist?, param ]]]
|
||||
|
||||
|
@ -1,11 +0,0 @@
|
||||
[ class=od, stream, date, time
|
||||
[ domain, type, levtype, dbase, rki, rty, ty
|
||||
[ step, levelist?, param ]]
|
||||
]
|
||||
|
||||
[ class=ensemble, number, stream, date, time,
|
||||
[ domain, type, levtype, dbase, rki, rty, ty
|
||||
[ step, levelist?, param ]]
|
||||
]
|
||||
|
||||
[ class, foo]
|
@ -1,156 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "2f01a012-002a-465c-9b09-681bdb3fc26d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"class\n",
|
||||
"type\n",
|
||||
"stream\n",
|
||||
"expver\n",
|
||||
"dataset\n",
|
||||
"model\n",
|
||||
"repres\n",
|
||||
"obsgroup\n",
|
||||
"reportype\n",
|
||||
"levtype\n",
|
||||
"levelist\n",
|
||||
"param\n",
|
||||
"date\n",
|
||||
"year\n",
|
||||
"month\n",
|
||||
"hdate\n",
|
||||
"offsetdate\n",
|
||||
"fcmonth\n",
|
||||
"fcperiod\n",
|
||||
"time\n",
|
||||
"offsettime\n",
|
||||
"step\n",
|
||||
"anoffset\n",
|
||||
"reference\n",
|
||||
"number\n",
|
||||
"quantile\n",
|
||||
"domain\n",
|
||||
"frequency\n",
|
||||
"direction\n",
|
||||
"diagnostic\n",
|
||||
"iteration\n",
|
||||
"channel\n",
|
||||
"ident\n",
|
||||
"instrument\n",
|
||||
"method\n",
|
||||
"origin\n",
|
||||
"system\n",
|
||||
"activity\n",
|
||||
"experiment\n",
|
||||
"generation\n",
|
||||
"realization\n",
|
||||
"resolution\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"language_yaml = \"./language.yaml\"\n",
|
||||
"import yaml\n",
|
||||
"\n",
|
||||
"with open(language_yaml, \"r\") as f:\n",
|
||||
" mars_language = yaml.safe_load(f)[\"_field\"]\n",
|
||||
"\n",
|
||||
"for k in mars_language.keys(): print(k)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "be9074a8-a56f-4fd0-a466-de8904faaa1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"id": "9dd26fe4-5da5-48a5-9e43-83ac1085f7e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"([Key(key='class', value='od', key_spec=class=od, reason='Matches'),\n",
|
||||
" Key(key='stream', value=5, key_spec=stream, reason='Matches'),\n",
|
||||
" Key(key='date', value='', key_spec=date, reason='Key Missing')],\n",
|
||||
" [Key(key='class', value='ensemble', key_spec=class=ensemble, reason='Matches'),\n",
|
||||
" Key(key='number', value='2', key_spec=number, reason='Matches'),\n",
|
||||
" Key(key='stream', value=5, key_spec=stream, reason='Matches'),\n",
|
||||
" Key(key='date', value='', key_spec=date, reason='Key Missing')])"
|
||||
]
|
||||
},
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from fdb_schema import FDBSchemaFile\n",
|
||||
"schema = FDBSchemaFile(\"./test_schema\")\n",
|
||||
"\n",
|
||||
"r = {\n",
|
||||
" \"class\" : [\"ensemble\", \"od\"],\n",
|
||||
" \"number\" : \"2\",\n",
|
||||
" \"stream\" : 5,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"a, b = schema.match_all(r)\n",
|
||||
"a, b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f46268e3-e197-47b9-bb6e-94f06e0bf648",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"([],\n",
|
||||
" [[Key(key='class', value='od', key_spec=class=od, reason='Matches'),\n",
|
||||
" Key(key='stream', value=5, key_spec=stream, reason='Matches'),\n",
|
||||
" Key(key='date', value='', key_spec=date, reason='Key Missing')],\n",
|
||||
" \n",
|
||||
" [Key(key='class', value='ensemble', key_spec=class=ensemble, reason='Matches'),\n",
|
||||
" Key(key='number', value='2', key_spec=number, reason='Matches'),\n",
|
||||
" Key(key='stream', value=5, key_spec=stream, reason='Matches'),\n",
|
||||
" Key(key='date', value='', key_spec=date, reason='Key Missing')]])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python [conda env:micromamba-ionbeam]",
|
||||
"language": "python",
|
||||
"name": "conda-env-micromamba-ionbeam-py"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
23
chart/.helmignore
Normal file
23
chart/.helmignore
Normal file
@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
6
chart/Chart.yaml
Normal file
6
chart/Chart.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
apiVersion: v2
|
||||
name: qubed
|
||||
description: A Helm chart for the STAC Server with frontend, STAC API and caching service.
|
||||
type: application
|
||||
version: 0.1.0
|
||||
appVersion: "0.1.0"
|
25
chart/templates/ingress.yaml
Normal file
25
chart/templates/ingress.yaml
Normal file
@ -0,0 +1,25 @@
|
||||
{{- if .Values.ingress.enabled }}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: stac-server-ingress
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
rules:
|
||||
- host: {{ .Values.ingress.hostname }}
|
||||
http:
|
||||
paths:
|
||||
{{- if .Values.stacServer.enabled }}
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: stac-server
|
||||
port:
|
||||
number: {{ .Values.stacServer.servicePort }}
|
||||
{{- end }}
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Values.ingress.hostname }}
|
||||
secretName: {{ .Values.ingress.tlsSecretName }}
|
||||
{{- end }}
|
43
chart/templates/stac-server-deployment.yaml
Normal file
43
chart/templates/stac-server-deployment.yaml
Normal file
@ -0,0 +1,43 @@
|
||||
# templates/stac-server-deployment.yaml
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: stac-server
|
||||
spec:
|
||||
replicas: {{ .Values.stacServer.replicas }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: stac-server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: stac-server
|
||||
spec:
|
||||
containers:
|
||||
- name: stac-server
|
||||
image: "{{ .Values.stacServer.image.repository }}:{{ .Values.stacServer.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.stacServer.image.pullPolicy }}
|
||||
env:
|
||||
- name: API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: api-key
|
||||
key: API_KEY
|
||||
- name: API_URL
|
||||
value: "https://{{ .Values.ingress.hostname }}/api/v1/"
|
||||
ports:
|
||||
- containerPort: {{ .Values.stacServer.servicePort }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: stac-server
|
||||
spec:
|
||||
selector:
|
||||
app: stac-server
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: {{ .Values.stacServer.servicePort }}
|
||||
targetPort: {{ .Values.stacServer.servicePort }}
|
||||
type: ClusterIP
|
13
chart/values.yaml
Normal file
13
chart/values.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
stacServer:
|
||||
enabled: true
|
||||
replicas: 1
|
||||
image:
|
||||
repository: "eccr.ecmwf.int/qubed/stac_server"
|
||||
tag: "latest"
|
||||
pullPolicy: Always
|
||||
servicePort: 80
|
||||
|
||||
ingress:
|
||||
enabled: True
|
||||
tlsSecretName: "lumi-wildcard-tls"
|
||||
hostname: "qubed.lumi.apps.dte.destination-earth.eu"
|
38
compose.yaml
38
compose.yaml
@ -1,20 +1,32 @@
|
||||
|
||||
services:
|
||||
backend:
|
||||
# STAC Server
|
||||
stac_server:
|
||||
# image: stac-server:latest
|
||||
container_name: stac_server
|
||||
build:
|
||||
context: ./backend
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
target: stac_server
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
|
||||
web:
|
||||
image: nginx
|
||||
ports:
|
||||
- "8123:80"
|
||||
- "8124:80"
|
||||
environment:
|
||||
- NGINX_HOST=localhost
|
||||
- NGINX_PORT=80
|
||||
- CONFIG_DIR=/config
|
||||
volumes:
|
||||
- ./webapp:/usr/share/nginx/html
|
||||
- ./stac_server:/code/stac_server
|
||||
# restart: always
|
||||
|
||||
web_query_builder:
|
||||
# image: web_query_builder:latest
|
||||
container_name: web_query_builder
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
target: web_query_builder
|
||||
ports:
|
||||
- "8125:80"
|
||||
environment:
|
||||
- API_URL=http://127.0.0.1:8124/api/v1/stac/climate-dt
|
||||
volumes:
|
||||
- ./web_query_builder:/code/web_query_builder
|
||||
restart: always
|
||||
|
6
config/config-climate-dt.yaml
Normal file
6
config/config-climate-dt.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
type: remote
|
||||
host: databridge-prod-catalogue3-ope.ewctest.link
|
||||
port: 10000
|
||||
engine: remote
|
||||
store: remote
|
6
config/config-extremes-dt.yaml
Normal file
6
config/config-extremes-dt.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
type: remote
|
||||
host: databridge-prod-catalogue1-ope.ewctest.link
|
||||
port: 10000
|
||||
engine: remote
|
||||
store: remote
|
@ -1,11 +1,18 @@
|
||||
---
|
||||
_field: &_field
|
||||
|
||||
foo:
|
||||
description: A foo field
|
||||
values:
|
||||
- bar
|
||||
- baz
|
||||
|
||||
|
||||
# type:
|
||||
# values:
|
||||
# - [fc, ]
|
||||
|
||||
|
||||
|
||||
|
||||
levtype:
|
||||
values:
|
||||
- [pl, ]
|
||||
|
||||
class:
|
||||
description: Class selects the main category of data to be retrieved such as operational, research or AIFS
|
||||
@ -14,79 +21,11 @@ _field: &_field
|
||||
flatten: false
|
||||
type: enum
|
||||
values:
|
||||
- [ai, operational aifs]
|
||||
# - [at, austria]
|
||||
# - [be, belgium]
|
||||
# - [c3, c3s]
|
||||
# - [ce, cems]
|
||||
# - [ch, switzerland]
|
||||
# - [ci, cerise]
|
||||
# - [co, cosmo]
|
||||
# - [cr, cams research]
|
||||
# - [cs, ecsn]
|
||||
# - [d1, destine]
|
||||
# - [de, germany]
|
||||
# - [dk, denmark]
|
||||
# - [dm, demeter]
|
||||
# - [dt, dts]
|
||||
# - [e2, e20c]
|
||||
# - [e4, reanalyse40]
|
||||
# - [e6, era6]
|
||||
# - [ea, era5, esat]
|
||||
# - [ed, eerie]
|
||||
# - [ef, efas]
|
||||
# - [ei, era interim]
|
||||
# - [el, eldas]
|
||||
# - [em, e20cm]
|
||||
# - [en, ensembles]
|
||||
# - [ep, cera-20c, cera20c]
|
||||
# - [er, reanalyse]
|
||||
# - [es, spain]
|
||||
# - [et, cera-sat, cerasat]
|
||||
# - [fi, finland]
|
||||
# - [fr, france]
|
||||
# - [gf, glofas]
|
||||
# - [gg, greenhouse gases]
|
||||
# - [gr, greece]
|
||||
# - [gw, global wildfire information system]
|
||||
# - [hr, croatia]
|
||||
# - [hu, hungary]
|
||||
# - [ie, ireland]
|
||||
# - [is, iceland]
|
||||
# - [it, italy]
|
||||
# - [j5, jra55]
|
||||
# - [l5, era5l]
|
||||
# - [l6, era6l]
|
||||
# - [la, aladin-laef, laef, lace]
|
||||
# - [lw, WMO lead centre wave forecast verification]
|
||||
# - [ma, metaps]
|
||||
# - [mc, macc]
|
||||
# - [me, mersea]
|
||||
# - [ml, machine learning]
|
||||
# - [ms, member states]
|
||||
# - [nl, netherlands]
|
||||
# - ['no', norway]
|
||||
# - [nr, ncep 20cr, 20cr]
|
||||
# - [o6, 'ocean 6 reanalysis']
|
||||
- [od, operations]
|
||||
# - [pe, permanent experiment]
|
||||
# - [pt, portugal]
|
||||
# - [pv, provost]
|
||||
- [rd, research]
|
||||
# - [rm, euro4m]
|
||||
# - [rr, regional reanalysis]
|
||||
# - [s2, s2s]
|
||||
# - [se, sweden]
|
||||
# - [si, slovenia]
|
||||
# - [sr, sreps]
|
||||
# - [te, test]
|
||||
# - [to, tost]
|
||||
# - [tr, turkey]
|
||||
# - [uk, united kingdom]
|
||||
# - [ul, ulysses]
|
||||
# - [ur, uerra]
|
||||
# - [yp, yopp]
|
||||
# - [yt, yotc]
|
||||
# - [ai, operational aifs]
|
||||
- [d1, Destine data]
|
||||
# - [od, operations]
|
||||
# - [rd, research]
|
||||
|
||||
|
||||
type:
|
||||
category: data
|
||||
@ -195,11 +134,17 @@ _field: &_field
|
||||
flatten: false
|
||||
type: enum
|
||||
values:
|
||||
- [clte, climate, Climate run output] # climate-dt
|
||||
- [oper, da, daily archive, atmospheric model] # climate-dt / extremes-dt / on-demand-extremes-dt
|
||||
- [wave, wv, wave model] # climate-dt / extremes-dt
|
||||
- [lwda, long window daily archive] # extremes-dt
|
||||
- [lwwv, long window wave] # extremes-dt
|
||||
- [clmn, climate-monthly, Climate run monthly means output] # climate-dt
|
||||
|
||||
# - [amap, analysis for multianalysis project]
|
||||
# - [ammc, melbourne]
|
||||
# - [cher, ch, chernobyl]
|
||||
# - [clte, climate, Climate run output]
|
||||
# - [clmn, climate-monthly, Climate run monthly means output]
|
||||
|
||||
# - [cnrm, meteo france climate centre]
|
||||
# - [cwao, montreal]
|
||||
# - [dacl, daily climatology]
|
||||
@ -227,7 +172,7 @@ _field: &_field
|
||||
# - [elda, ensemble long window data assimilation]
|
||||
# - [enda, ensemble data assimilation]
|
||||
# - [enfh, ensemble forecast hindcasts]
|
||||
- [enfo, ef, ensemble prediction system]
|
||||
# - [enfo, ef, ensemble prediction system]
|
||||
# - [enwh, ensemble forecast wave hindcasts]
|
||||
# - [esmm, combined multi-model monthly means]
|
||||
# - [espd, ensemble supplementary data]
|
||||
@ -244,8 +189,6 @@ _field: &_field
|
||||
# - [gfra, global fire assimilation system reanalysis]
|
||||
# - [kwbc, washington]
|
||||
# - [lfpw, paris, toulouse]
|
||||
# - [lwda, long window daily archive]
|
||||
# - [lwwv, long window wave]
|
||||
# - [ma, means archive]
|
||||
# - [maed, multianalysis ensemble data]
|
||||
# - [mawm, wave anomaly means]
|
||||
@ -282,7 +225,6 @@ _field: &_field
|
||||
# - [ocda, ocean data assimilation]
|
||||
# - [ocea, ocean]
|
||||
# - [olda, ocean Long window data assimilation]
|
||||
- [oper, da, daily archive, atmospheric model]
|
||||
# - [rjtd, tokyo]
|
||||
# - [scda, atmospheric model (short cutoff)]
|
||||
# - [scwv, wave model (short cutoff)]
|
||||
@ -301,7 +243,6 @@ _field: &_field
|
||||
# - [wamo, wave monthly means]
|
||||
# - [wams, multi-model seasonal forecast wave]
|
||||
# - [wasf, wave seasonal forecast]
|
||||
# - [wave, wv, wave model]
|
||||
# - [wavm, wave model (standalone)]
|
||||
# - [weef, wave extended ensemble forecast]
|
||||
# - [weeh, wave extended ensemble forecast hindcast]
|
||||
@ -324,11 +265,16 @@ _field: &_field
|
||||
values:
|
||||
- ['0001', 'Operational Data']
|
||||
- ['xxxx', 'Experimental Data']
|
||||
- ['xxxy', 'Experimental Data']
|
||||
|
||||
dataset:
|
||||
# flatten: false
|
||||
description: The dataset, for DestinE this is one of climate-dt, extremes-dt, on-demand-extremes-dt
|
||||
multiple: true
|
||||
type: any
|
||||
type: enum
|
||||
values:
|
||||
- [climate-dt, Climate Data]
|
||||
- [extremes-dt, Extremes Data]
|
||||
- [on-demand-extremes-dt, On-Demand Extremes Data]
|
||||
|
||||
model:
|
||||
category: data
|
||||
@ -469,7 +415,27 @@ _field: &_field
|
||||
never:
|
||||
- levtype: [sfc, o2d]
|
||||
- type: ssd
|
||||
type: to-by-list-float
|
||||
type: enum
|
||||
values:
|
||||
- [1, ]
|
||||
- [5, ]
|
||||
- [10, ]
|
||||
- [20, ]
|
||||
- [30, ]
|
||||
- [50, ]
|
||||
- [70, ]
|
||||
- [100, ]
|
||||
- [150, ]
|
||||
- [200, ]
|
||||
- [250, ]
|
||||
- [300, ]
|
||||
- [400, ]
|
||||
- [500, ]
|
||||
- [600, ]
|
||||
- [700, ]
|
||||
- [850, ]
|
||||
- [925, ]
|
||||
- [1000, ]
|
||||
|
||||
param:
|
||||
category: data
|
||||
@ -478,6 +444,17 @@ _field: &_field
|
||||
type: param
|
||||
never:
|
||||
- type: [tf, ob]
|
||||
values:
|
||||
- [60, "Potential vorticity"]
|
||||
- [129, "Geopotential"]
|
||||
- [130, "Temperature"]
|
||||
- [131, "U component of wind"]
|
||||
- [132, "V component of wind"]
|
||||
- [133, "Specific humidity"]
|
||||
- [135, "Vertical velocity"]
|
||||
- [157, "Relative humidity"]
|
||||
- [246, "Specific cloud liquid water content"]
|
||||
|
||||
|
||||
#################################################################
|
||||
|
||||
@ -488,8 +465,10 @@ _field: &_field
|
||||
date:
|
||||
category: data
|
||||
default: 0
|
||||
type: date
|
||||
type: enum
|
||||
multiple: true
|
||||
values:
|
||||
- [20211021, ]
|
||||
|
||||
year:
|
||||
category: data
|
||||
@ -553,7 +532,32 @@ _field: &_field
|
||||
category: data
|
||||
default: '1200'
|
||||
multiple: true
|
||||
type: time
|
||||
type: enum
|
||||
values:
|
||||
- ["0000", ]
|
||||
- ["0100", ]
|
||||
- ["0200", ]
|
||||
- ["0300", ]
|
||||
- ["0400", ]
|
||||
- ["0500", ]
|
||||
- ["0600", ]
|
||||
- ["0700", ]
|
||||
- ["0800", ]
|
||||
- ["0900", ]
|
||||
- ["1000", ]
|
||||
- ["1100", ]
|
||||
- ["1200", ]
|
||||
- ["1300", ]
|
||||
- ["1400", ]
|
||||
- ["1500", ]
|
||||
- ["1600", ]
|
||||
- ["1700", ]
|
||||
- ["1800", ]
|
||||
- ["1900", ]
|
||||
- ["2000", ]
|
||||
- ["2100", ]
|
||||
- ["2200", ]
|
||||
- ["2300", ]
|
||||
|
||||
offsettime:
|
||||
category: data
|
||||
@ -747,25 +751,42 @@ _field: &_field
|
||||
#######################################################################
|
||||
# DestinE ClimateDT related keywords
|
||||
|
||||
|
||||
model:
|
||||
type: enum
|
||||
description: Which climate model to use.
|
||||
values:
|
||||
- [ifs-fesom, Integrated Forecast System - FESOM]
|
||||
|
||||
activity:
|
||||
category: data
|
||||
type: lowercase
|
||||
type: enum
|
||||
values:
|
||||
- [story-nudging, ]
|
||||
|
||||
experiment:
|
||||
category: data
|
||||
type: lowercase
|
||||
type: enum
|
||||
values:
|
||||
- [tplus2.0k, ]
|
||||
|
||||
generation:
|
||||
category: data
|
||||
type: integer
|
||||
type: enum
|
||||
values:
|
||||
- [1, ]
|
||||
|
||||
realization:
|
||||
category: data
|
||||
type: integer
|
||||
values:
|
||||
- [1, ]
|
||||
|
||||
resolution:
|
||||
category: data
|
||||
type: lowercase
|
||||
type: enum
|
||||
values:
|
||||
- [standard, ]
|
||||
|
||||
#######################################################################
|
||||
|
18996
config/language/paramids.yaml
Normal file
18996
config/language/paramids.yaml
Normal file
File diff suppressed because it is too large
Load Diff
36
dockerfile
Normal file
36
dockerfile
Normal file
@ -0,0 +1,36 @@
|
||||
FROM python:3.12-slim AS base
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
openssh-client \
|
||||
git \
|
||||
&& apt-get clean
|
||||
|
||||
RUN pip install uv
|
||||
|
||||
# Allows cloning private repos using RUN --mount=type=ssh git clone
|
||||
RUN mkdir -p -m 0600 ~/.ssh && \
|
||||
ssh-keyscan -H github.com >> ~/.ssh/known_hosts
|
||||
|
||||
# Get Rust
|
||||
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
WORKDIR /code
|
||||
|
||||
FROM base AS stac_server
|
||||
|
||||
COPY stac_server/requirements.txt /code/requirements.txt
|
||||
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
||||
|
||||
COPY ./src /code/qubed/src
|
||||
COPY ./pyproject.toml /code/qubed/
|
||||
COPY ./Cargo.toml /code/qubed/
|
||||
COPY ./README.md /code/qubed/
|
||||
|
||||
RUN pip install --no-cache-dir -e /code/qubed
|
||||
COPY ./stac_server /code/stac_server
|
||||
|
||||
WORKDIR /code/stac_server
|
||||
CMD ["fastapi", "dev", "main.py", "--proxy-headers", "--port", "80", "--host", "0.0.0.0"]
|
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
159
docs/_static/banner.svg
vendored
Normal file
159
docs/_static/banner.svg
vendored
Normal file
@ -0,0 +1,159 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
width="1000"
|
||||
height="200"
|
||||
viewBox="0 0 264.58333 52.916666"
|
||||
version="1.1"
|
||||
id="svg5"
|
||||
xml:space="preserve"
|
||||
inkscape:version="1.2.2 (b0a84865, 2022-12-01)"
|
||||
sodipodi:docname="banner.svg"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"><sodipodi:namedview
|
||||
id="namedview7"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#000000"
|
||||
borderopacity="0.25"
|
||||
inkscape:showpageshadow="2"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pagecheckerboard="0"
|
||||
inkscape:deskcolor="#d1d1d1"
|
||||
inkscape:document-units="mm"
|
||||
showgrid="false"
|
||||
inkscape:zoom="1.4221154"
|
||||
inkscape:cx="509.80392"
|
||||
inkscape:cy="23.908046"
|
||||
inkscape:window-width="2665"
|
||||
inkscape:window-height="1000"
|
||||
inkscape:window-x="96"
|
||||
inkscape:window-y="35"
|
||||
inkscape:window-maximized="0"
|
||||
inkscape:current-layer="g330" /><defs
|
||||
id="defs2"><rect
|
||||
x="641.41612"
|
||||
y="32.816639"
|
||||
width="73.588826"
|
||||
height="29.833308"
|
||||
id="rect2775" /><rect
|
||||
x="500.20513"
|
||||
y="263.52755"
|
||||
width="244.63313"
|
||||
height="143.19988"
|
||||
id="rect2749" /><rect
|
||||
x="467.38849"
|
||||
y="331.14972"
|
||||
width="258.55534"
|
||||
height="132.261"
|
||||
id="rect2743" /><rect
|
||||
x="80.859469"
|
||||
y="61.833711"
|
||||
width="299.65568"
|
||||
height="114.15454"
|
||||
id="rect242" /><rect
|
||||
x="61.833711"
|
||||
y="66.590151"
|
||||
width="313.925"
|
||||
height="114.15454"
|
||||
id="rect236" /></defs><g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"><text
|
||||
xml:space="preserve"
|
||||
transform="scale(0.26458333)"
|
||||
id="text234"
|
||||
style="font-weight:500;font-size:20px;line-height:1.2;font-family:Futura;-inkscape-font-specification:'Futura, Medium';white-space:pre;shape-inside:url(#rect236);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><text
|
||||
xml:space="preserve"
|
||||
transform="scale(0.26458333)"
|
||||
id="text240"
|
||||
style="font-weight:500;font-size:20px;line-height:1.2;font-family:Futura;-inkscape-font-specification:'Futura, Medium';white-space:pre;shape-inside:url(#rect242);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><text
|
||||
xml:space="preserve"
|
||||
transform="scale(0.26458333)"
|
||||
id="text2741"
|
||||
style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:20px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect2743);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><text
|
||||
xml:space="preserve"
|
||||
transform="scale(0.26458333)"
|
||||
id="text2747"
|
||||
style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:20px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect2749);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><text
|
||||
xml:space="preserve"
|
||||
transform="scale(0.26458333)"
|
||||
id="text2773"
|
||||
style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:26.6667px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect2775);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><g
|
||||
id="g349"
|
||||
transform="translate(-5.8208336)"><text
|
||||
xml:space="preserve"
|
||||
style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;stroke-width:0.245181;stroke-miterlimit:4.9;stroke-dasharray:0.735542, 0.245181;stroke-opacity:0.40146"
|
||||
x="114.48351"
|
||||
y="14.228302"
|
||||
id="text2763"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan2761"
|
||||
style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
|
||||
x="114.48351"
|
||||
y="14.228302">root</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
|
||||
x="114.48351"
|
||||
y="21.701376"
|
||||
id="tspan2765">├── class=cd, stream=lwda/oai, param=1/2/3</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
|
||||
x="114.48351"
|
||||
y="29.17445"
|
||||
id="tspan2767">├── class=od, expver=1/2, param=1/2</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
|
||||
x="114.48351"
|
||||
y="36.647522"
|
||||
id="tspan2771">├── class=rd, param=1/2/3</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
|
||||
x="114.48351"
|
||||
y="44.120598"
|
||||
id="tspan2769">└── ...</tspan></text><g
|
||||
id="g330"><text
|
||||
xml:space="preserve"
|
||||
style="font-weight:500;font-size:14.1111px;line-height:0;font-family:Futura;-inkscape-font-specification:'Futura, Medium';white-space:pre;inline-size:112.115;display:inline;fill:#000000;stroke-width:0.245181;stroke-miterlimit:4.9;stroke-dasharray:0.735542, 0.245181;stroke-opacity:0.40146"
|
||||
x="5.439929"
|
||||
y="17.022402"
|
||||
id="text248"
|
||||
transform="translate(0,-1.5875)"><tspan
|
||||
x="5.439929"
|
||||
y="17.022402"
|
||||
id="tspan532"><tspan
|
||||
style="font-size:12.3472px;line-height:1.2"
|
||||
id="tspan530">Qube</tspan></tspan></text><text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:5.29167px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;inline-size:87.6248;display:inline;fill:#000000;stroke-width:0.245181;stroke-miterlimit:4.9;stroke-dasharray:0.735542, 0.245181;stroke-opacity:0.40146"
|
||||
x="4.209815"
|
||||
y="40.519432"
|
||||
id="text2755"
|
||||
transform="translate(1.744648,-4.9844494)"><tspan
|
||||
x="4.209815"
|
||||
y="40.519432"
|
||||
id="tspan534">1. A data structure for efficiently </tspan><tspan
|
||||
x="4.209815"
|
||||
y="46.604852"
|
||||
id="tspan536">representing and querying complex </tspan><tspan
|
||||
x="4.209815"
|
||||
y="52.690271"
|
||||
id="tspan538">tree-like datacubes.</tspan></text><text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:5.29167px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;stroke-width:0.245181;stroke-miterlimit:4.9;stroke-dasharray:0.735542, 0.245181;stroke-opacity:0.40146"
|
||||
x="5.4673572"
|
||||
y="26.586193"
|
||||
id="text2759"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan2757"
|
||||
style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
|
||||
x="5.4673572"
|
||||
y="26.586193">[kjuːb] <tspan
|
||||
style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal"
|
||||
id="tspan495">noun</tspan></tspan></text><path
|
||||
style="fill:#000000;stroke:#000000;stroke-width:0.445;stroke-miterlimit:4.9;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
||||
d="M 6.0516036,18.417924 H 92.221177"
|
||||
id="path2833"
|
||||
sodipodi:nodetypes="cc" /></g></g></g></svg>
|
After Width: | Height: | Size: 10 KiB |
86
docs/algorithms.md
Normal file
86
docs/algorithms.md
Normal file
@ -0,0 +1,86 @@
|
||||
---
|
||||
jupytext:
|
||||
text_representation:
|
||||
extension: .md
|
||||
format_name: myst
|
||||
format_version: 0.13
|
||||
jupytext_version: 1.16.4
|
||||
---
|
||||
# Under the Hood
|
||||
|
||||
## Set Operations
|
||||
|
||||
Qubes represent sets of objects, so the familiar set operations:
|
||||
* Union `A | B` or `Qube.union(A, B)`
|
||||
* Intersection `A & B` or `Qube.intersection(A, B)`
|
||||
* Difference (both `A - B` or `B - A`) or `Qube.difference(A, B)`
|
||||
* Symmetric difference `A ^ B` or `Qube.symmetric_difference(A, B)`
|
||||
|
||||
are all defined.
|
||||
|
||||
We can implement these operations by breaking the problem down into a recursive function:
|
||||
|
||||
```python
|
||||
def operation(A : Qube, B : Qube) -> Qube:
|
||||
...
|
||||
```
|
||||
|
||||
Consider the intersection of A and B:
|
||||
```
|
||||
A
|
||||
├─── a=1, b=1/2/3, c=1
|
||||
└─── a=2, b=1/2/3, c=1
|
||||
|
||||
B
|
||||
├─── a=1, b=3/4/5, c=2
|
||||
└─── a=2, b=3/4/5, c=2
|
||||
```
|
||||
|
||||
We pair the two trees and traverse them in tandem, at each level we group the nodes by node key and for every pair of nodes in a group, compute the values only in A, the values only in B and the
|
||||
```
|
||||
for node_a in level_A:
|
||||
for node_b in level_B:
|
||||
just_A, intersection, just_B = Qube.fused_set_operations(
|
||||
node_a.values,
|
||||
node_b.values
|
||||
)
|
||||
```
|
||||
|
||||
Based on the particular operation we're computing we keep or discard these three objects:
|
||||
* Union: keep just_A, intersection, just_B
|
||||
* Intersection: keep intersection
|
||||
* A - B: keep just_A, B - A keep just_B
|
||||
* Symmetric difference: keep just_A and just_B but not intersection
|
||||
|
||||
The reason we have to keep just_A, intersection and just just_B separate is that each will produce a node with different children:
|
||||
* just_B: the children of node_B
|
||||
* just_A: the children of node_A
|
||||
* intersection: the result of calling `operation(A, B)` recursively on two new nodes formed from A and B but with just the intersecting values.
|
||||
|
||||
This structure means that node.values can take different types, the two most useful being:
|
||||
* an enum, just a set of values
|
||||
* a range with start, stop and step
|
||||
|
||||
Qube.fused_set_operations can dispatch on the two types given in order to efficiently compute set/set, set/range and range/range intersection operations.
|
||||
|
||||
### Performance considerations
|
||||
|
||||
This algorithm is quadratic in the number of matching keys, this means that if we have a level with a huge number of nodes with key 'date' and range types (since range types are currently restricted to being contiguous) we could end up with a quadtratic slow down.
|
||||
|
||||
There are some ways this can be sped up:
|
||||
|
||||
* Once we know any of just_A, intersection or just_B are empty we can discard them. Only for quite pathological inputs (many enums sparse enums with a lot of overlap) would you actually get quadratically many non-empty terms.
|
||||
|
||||
* For ranges intersected with ranges, we could speed the algorithm up significantly by sorting the ranges and walking the two lists in tandem which reduces it to linear in the number of ranges.
|
||||
|
||||
* If we have N_A and N_B nodes to compare between the two trees we have N_A*N_B comparisons to do. However if at the end of the day we're just trying to determine for each value whether it's in A, B or both. If N_A*N_B >> M the number of value s we might be able to switch to an alternative algorithm.
|
||||
|
||||
|
||||
## Compression
|
||||
|
||||
In order to keep the tree compressed as operations are performed on it we define the "structural hash" of a node to be the hash of:
|
||||
* The node's key
|
||||
* Not the node's values.
|
||||
* The keys, values and children of the nodes children, recursively.
|
||||
|
||||
This structural hash lets us identify when two sibling nodes may be able to be merged into one node thus keeping the tree compressed.
|
5
docs/autobuild.sh
Executable file
5
docs/autobuild.sh
Executable file
@ -0,0 +1,5 @@
|
||||
# cd to current directory of script
|
||||
parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
|
||||
cd "$parent_path"
|
||||
|
||||
sphinx-autobuild . _build
|
87
docs/background.md
Normal file
87
docs/background.md
Normal file
@ -0,0 +1,87 @@
|
||||
---
|
||||
jupytext:
|
||||
text_representation:
|
||||
extension: .md
|
||||
format_name: myst
|
||||
format_version: 0.13
|
||||
jupytext_version: 1.16.4
|
||||
---
|
||||
# Datacubes, Trees and Compressed trees
|
||||
|
||||
This section contains a bit more of an introduction to the datastructure, feel free to skip to the [Quickstart](quickstart.md). See the [datacube spec](https://github.com/ecmwf/datacube-spec), for even more detail and the canonical source of truth on the matter.
|
||||
|
||||
Qubed is primarily geared towards dealing with datafiles uniquely labeled by sets of key value pairs. We'll call a set of key value pairs that uniquely labels some data an `identifier`. Here's an example:
|
||||
|
||||
```python
|
||||
{
|
||||
'class': 'd1',
|
||||
'dataset': 'climate-dt',
|
||||
'generation': '1',
|
||||
'date': '20241102',
|
||||
'resolution': 'high',
|
||||
'time': '0000',
|
||||
}
|
||||
```
|
||||
|
||||
Unfortunately, we have more than one data file. If we are lucky, the set of identifiers that current exists might form a dense datacube that we could represent like this:
|
||||
|
||||
```python
|
||||
{
|
||||
'class': ['d1', 'd2'],
|
||||
'dataset': 'climate-dt',
|
||||
'generation': ['1','2','3'],
|
||||
'model': 'icon',
|
||||
'date': ['20241102','20241103'],
|
||||
'resolution': ['high','low'],
|
||||
'time': ['0000', '0600', '1200', '1800'],
|
||||
}
|
||||
```
|
||||
|
||||
with the property that any particular choice for a value for any key will correspond to datafile that exists. So this object represents `2x1x3x1x2x2x4 = 96` different datafiles.
|
||||
|
||||
To save space I will also represent this same thing like this:
|
||||
```
|
||||
- class=d1/d2, dataset=climate-dt, generation=1/2/3, ..., time=0000/0600/1200/1800
|
||||
```
|
||||
|
||||
Unfortunately, we are not lucky and our datacubes are not always dense. In this case we might instead represent which data exists using a tree:
|
||||
|
||||
```{code-cell} python3
|
||||
from qubed import Qube
|
||||
|
||||
q = Qube.from_dict({
|
||||
"class=od" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
},
|
||||
"class=rd" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
},
|
||||
})
|
||||
|
||||
# depth controls how much of the tree is open when rendered as html.
|
||||
q.html(depth=100)
|
||||
```
|
||||
|
||||
But it's clear that the above tree contains a lot of redundant information. Many of the subtrees are identical for example. Indeed in practice a lot of our data turns out to be 'nearly dense' in that it contains many dense datacubes within it.
|
||||
|
||||
There are many valid ways one could compress this tree. If we add the restriction that no identical key=value pairs can be adjacent then here is the compressed tree we might get:
|
||||
|
||||
```{code-cell} python3
|
||||
q.compress()
|
||||
````
|
||||
|
||||
```{warning}
|
||||
Without the above restriction we could, for example, have:
|
||||
|
||||
root
|
||||
├── class=od, expver=0001/0002, param=1/2
|
||||
└── class=rd
|
||||
├── expver=0001, param=3
|
||||
└── expver=0001/0002, param=1/2
|
||||
|
||||
but we do not allow this because it would mean we would have to take multiple branches in order to find data with `expver=0001`.
|
||||
```
|
||||
|
||||
What we have now is a tree of dense datacubes which represents a single larger sparse datacube in a more compact manner. For want of a better word we'll call it a Qube.
|
44
docs/cmd.md
Normal file
44
docs/cmd.md
Normal file
@ -0,0 +1,44 @@
|
||||
# Command Line Usage
|
||||
|
||||
```bash
|
||||
fdb list class=rd,expver=0001,... | qubed --from=fdblist --to=text
|
||||
|
||||
fdb list --minimum-keys=class class=d1,dataset=climate-dt --config prod_remoteFDB.yaml | qubed convert --from=fdb --to=text
|
||||
|
||||
```
|
||||
|
||||
`--from` options include:
|
||||
* `fdb`
|
||||
|
||||
`--to` options include:
|
||||
* `text`
|
||||
* `html`
|
||||
* `json`
|
||||
|
||||
use `--input` and `--output` to specify input and output files respectively.
|
||||
|
||||
|
||||
There's some handy test data in the `tests/data` directory. For example:
|
||||
```bash
|
||||
gzip -dc tests/data/fdb_list_compact.gz| qubed convert --from=fdb --to=text --output=qube.txt
|
||||
gzip -dc tests/data/fdb_list_porcelain.gz| qubed convert --from=fdb --to=json --output=qube.json
|
||||
gzip -dc tests/data/fdb_list_compact.gz | qubed convert --from=fdb --to=html --output=qube.html
|
||||
|
||||
// Operational data stream=oper/wave/enfo/waef
|
||||
fdb list class=od,expver=0001,date=0,stream=oper --compact >> operational_compact.txt
|
||||
operational_compact.txt | qubed convert --from=fdb --to=text --output=operational.txt
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Todo
|
||||
|
||||
--from for
|
||||
* `protobuf`
|
||||
* `marslist`
|
||||
* `constraints`
|
||||
|
||||
--to for
|
||||
* `json`
|
||||
* `datacubes`
|
||||
* `constraints`
|
39
docs/conf.py
Normal file
39
docs/conf.py
Normal file
@ -0,0 +1,39 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# For the full list of built-in configuration values, see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
project = "qubed"
|
||||
copyright = "2025, Tom Hodson (ECMWF)"
|
||||
author = "Tom Hodson (ECMWF)"
|
||||
release = "0.1.0"
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
extensions = [
|
||||
"sphinx.ext.autodoc", # for generating documentation from the docstrings in our code
|
||||
"sphinx.ext.napoleon", # for parsing Numpy and Google stye docstrings
|
||||
"myst_nb", # For parsing markdown
|
||||
]
|
||||
|
||||
templates_path = ["_templates"]
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "jupyter_execute"]
|
||||
|
||||
|
||||
source_suffix = {
|
||||
".rst": "restructuredtext",
|
||||
}
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
|
||||
myst_enable_extensions = [
|
||||
"attrs_inline",
|
||||
]
|
21
docs/development.md
Normal file
21
docs/development.md
Normal file
@ -0,0 +1,21 @@
|
||||
# Development
|
||||
|
||||
To install the latest stable release from PyPI (recommended):
|
||||
|
||||
```bash
|
||||
pip install qubed
|
||||
```
|
||||
|
||||
To install the latest version from github (requires rust):
|
||||
|
||||
```bash
|
||||
pip install qubed@git+https://github.com/ecmwf/qubed.git@main
|
||||
```
|
||||
|
||||
To build the develop branch from source install a rust toolchain and pip install maturin then run:
|
||||
|
||||
```
|
||||
git clone -b develop git@github.com:ecmwf/qubed.git
|
||||
cd qubed
|
||||
maturin develop
|
||||
```
|
137
docs/fiab.md
Normal file
137
docs/fiab.md
Normal file
@ -0,0 +1,137 @@
|
||||
---
|
||||
jupytext:
|
||||
text_representation:
|
||||
extension: .md
|
||||
format_name: myst
|
||||
format_version: 0.13
|
||||
jupytext_version: 1.16.4
|
||||
---
|
||||
|
||||
# Fiab
|
||||
|
||||
## Model Selection
|
||||
|
||||
This is a demo of using qubed to select from a set of forecast models that each produce a set of output variables.
|
||||
|
||||
First let's construct some models represented as qubes:
|
||||
|
||||
```{code-cell} python3
|
||||
from qubed import Qube
|
||||
model_1 = Qube.from_datacube({
|
||||
"levtype": "pl",
|
||||
"param" : ["q", "t", "u", "v", "w", "z"],
|
||||
"level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
|
||||
}) | Qube.from_datacube({
|
||||
"levtype": "sfc",
|
||||
"param" : ["10u", "10v", "2d", "2t", "cp", "msl", "skt", "sp", "tcw", "tp"],
|
||||
})
|
||||
|
||||
model_1 = "model=1" / ("frequency=6h" / model_1)
|
||||
model_1
|
||||
```
|
||||
|
||||
This is the most complete model. Now let's do one with fewer variables and levels:
|
||||
|
||||
```{code-cell} python3
|
||||
model_2 = Qube.from_datacube({
|
||||
"levtype": "pl",
|
||||
"param" : ["q", "t"],
|
||||
"level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
|
||||
}) | Qube.from_datacube({
|
||||
"levtype": "sfc",
|
||||
"param" : ["2t", "cp", "msl"],
|
||||
})
|
||||
model_2 = "model=2" / ("frequency=continuous" / model_2)
|
||||
```
|
||||
|
||||
```{code-cell} python3
|
||||
model_3 = Qube.from_datacube({
|
||||
"levtype": "pl",
|
||||
"param" : ["q", "t"],
|
||||
"level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
|
||||
}) | Qube.from_datacube({
|
||||
"levtype": "sfc",
|
||||
"param" : ["2t", "cp", "msl"],
|
||||
})
|
||||
model_3 = "model=3" / ("frequency=6h" / model_3)
|
||||
model_3
|
||||
```
|
||||
|
||||
|
||||
Now we can combine the three models into a single qube:
|
||||
|
||||
```{code-cell} python3
|
||||
all_models = model_1 | model_2 | model_3
|
||||
all_models
|
||||
```
|
||||
|
||||
Now we can perform queries over the models. We can get all models that produce 2m temperature:
|
||||
```{code-cell} python3
|
||||
all_models.select({
|
||||
"param" : "2t",
|
||||
})
|
||||
```
|
||||
|
||||
Filter on both parameter and frequency:
|
||||
|
||||
```{code-cell} python3
|
||||
all_models.select({
|
||||
"param" : "2t",
|
||||
"frequency": "continuous",
|
||||
})
|
||||
```
|
||||
|
||||
Find all models that have some overlap with this set of parameters:
|
||||
|
||||
```{code-cell} python3
|
||||
all_models.select({
|
||||
"param" : ["q", "t", "u", "v"],
|
||||
})
|
||||
```
|
||||
|
||||
## Choosing a set of models based on the requested parameter set
|
||||
|
||||
```{code-cell} python3
|
||||
all_models.select({
|
||||
"param" : ["q", "t", "u", "v"],
|
||||
"frequency": "6h",
|
||||
})
|
||||
```
|
||||
|
||||
## Using WildCards
|
||||
|
||||
```{code-cell} python3
|
||||
daily_surface_means = Qube.from_datacube({
|
||||
"model": "*",
|
||||
"frequency": "*",
|
||||
"levtype": "sfc",
|
||||
"param": "*",
|
||||
})
|
||||
all_models & daily_surface_means
|
||||
```
|
||||
|
||||
```{code-cell} python3
|
||||
|
||||
daily_level_means = Qube.from_datacube({
|
||||
"model": "*",
|
||||
"frequency": "*",
|
||||
"levtype": "pl",
|
||||
"param": "*",
|
||||
"level": "*"
|
||||
})
|
||||
all_models & daily_level_means
|
||||
```
|
||||
|
||||
```{code-cell} python3
|
||||
daily_level_mean_products = all_models & daily_surface_means
|
||||
for i, identifier in enumerate(daily_level_mean_products.leaves()):
|
||||
print(identifier)
|
||||
if i > 10:
|
||||
print("...")
|
||||
break
|
||||
|
||||
```
|
||||
|
||||
<!-- ## Choosing the fewest models needed to cover the requested parameter set -->
|
||||
|
||||
<!-- ```{code-cell} python3 -->
|
53
docs/index.md
Normal file
53
docs/index.md
Normal file
@ -0,0 +1,53 @@
|
||||
---
|
||||
jupytext:
|
||||
text_representation:
|
||||
extension: .md
|
||||
format_name: myst
|
||||
format_version: 0.13
|
||||
jupytext_version: 1.16.4
|
||||
---
|
||||
|
||||
# Qubed
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 1
|
||||
quickstart.md
|
||||
development.md
|
||||
background.md
|
||||
algorithms.md
|
||||
fiab.md
|
||||
cmd.md
|
||||
```
|
||||
|
||||
Qubed provides a datastructure primitive for working with trees of DataCubes. If a normal tree looks like this:
|
||||
```
|
||||
root
|
||||
├── class=od
|
||||
│ ├── expver=0001
|
||||
│ │ ├── param=1
|
||||
│ │ └── param=2
|
||||
│ └── expver=0002
|
||||
│ ├── param=1
|
||||
│ └── param=2
|
||||
└── class=rd
|
||||
├── expver=0001
|
||||
│ ├── param=1
|
||||
│ ├── param=2
|
||||
│ └── param=3
|
||||
└── expver=0002
|
||||
├── param=1
|
||||
└── param=2
|
||||
```
|
||||
|
||||
A compressed view of the same set would be:
|
||||
```
|
||||
root
|
||||
├── class=od, expver=0001/0002, param=1/2
|
||||
└── class=rd
|
||||
├── expver=0001, param=1/2/3
|
||||
└── expver=0002, param=1/2
|
||||
```
|
||||
|
||||
Qubed provides a datastructure that represents this compressed cube we call a Qube. It defines all the algorithms you would expect such as intersection/union/difference, compression, search, transformation and filtering.
|
||||
|
||||
To get a little more background on the motivation and structure of a Qube go to [Background](background.md), for a more hands on intro, go to [Quickstart](quickstart.md).
|
35
docs/make.bat
Normal file
35
docs/make.bat
Normal file
@ -0,0 +1,35 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=.
|
||||
set BUILDDIR=_build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
273
docs/quickstart.md
Normal file
273
docs/quickstart.md
Normal file
@ -0,0 +1,273 @@
|
||||
---
|
||||
jupytext:
|
||||
text_representation:
|
||||
extension: .md
|
||||
format_name: myst
|
||||
format_version: 0.13
|
||||
jupytext_version: 1.16.4
|
||||
---
|
||||
# Quickstart
|
||||
|
||||
First install qubed with `pip install qubed`. Now, let's dive in with a real world dataset from the [Climate DT](https://destine.ecmwf.int/climate-change-adaptation-digital-twin-climate-dt/). We'll pull a prebuilt qube from github and render it in it's default HTML representation.
|
||||
|
||||
```{code-cell} python3
|
||||
import requests
|
||||
from qubed import Qube
|
||||
climate_dt = Qube.from_json(requests.get("https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json())
|
||||
climate_dt.html(depth=1)
|
||||
```
|
||||
|
||||
Click the arrows to expand and drill down deeper into the data.
|
||||
|
||||
```{note}
|
||||
There is currently a simple Qube web browser hosted [here](https://qubed.lumi.apps.dte.destination-earth.eu/). Browse that and copy the 'Example Qube Code' to download a Qube representing the selection at that point. You'll get something like `Qube.from_json(requests.get("https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/select/climate-dt/?").json())`{l=python}
|
||||
```
|
||||
|
||||
Fundamentally a Qube represents a set identifiers which are a set of key value pairs, here's the one leaf in the Climate DT dataset:
|
||||
|
||||
```{code-cell} python3
|
||||
next(climate_dt.leaves())
|
||||
```
|
||||
|
||||
We can look at the set of values each key can take:
|
||||
```{code-cell} python3
|
||||
axes = climate_dt.axes()
|
||||
for key, values in axes.items():
|
||||
print(f"{key} : {list(sorted(values))[:10]}")
|
||||
```
|
||||
|
||||
This dataset isn't dense, you can't choose any combination of the above key values pairs, but it does contain many dense datacubes. Hence it makes sense to store and process the set as a tree of dense datacubes, which is what a Qube. For a sense of scale, this dataset contains about 8 million distinct datasets but only contains a few hundred unique nodes.
|
||||
|
||||
```{code-cell} python3
|
||||
import objsize
|
||||
print(f"""
|
||||
Distinct datasets: {climate_dt.n_leaves}
|
||||
Number of nodes in the tree: {climate_dt.n_nodes}
|
||||
Number of dense datacubes within this qube: {len(list(climate_dt.datacubes()))}
|
||||
In memory size according to objsize: {objsize.get_deep_size(climate_dt) / 2**20:.0f} MB
|
||||
""")
|
||||
```
|
||||
|
||||
## Building your own Qubes
|
||||
|
||||
You can do it from nested dictionaries with keys in the form "{key=value}":
|
||||
|
||||
```{code-cell} python3
|
||||
from qubed import Qube
|
||||
|
||||
q1 = Qube.from_dict({
|
||||
"class=od" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
},
|
||||
"class=rd" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
},
|
||||
})
|
||||
print(f"{q1.n_leaves = }, {q1.n_nodes = }")
|
||||
q1
|
||||
```
|
||||
|
||||
If someone sends you a printed qube you can convert that back to a Qube too:
|
||||
|
||||
```{code-cell} python3
|
||||
q2 = Qube.from_tree("""
|
||||
root, frequency=6:00:00
|
||||
├── levtype=pl, param=t, levelist=850, threshold=-2/-4/-8/2/4/8
|
||||
└── levtype=sfc
|
||||
├── param=10u/10v, threshold=10/15
|
||||
├── param=2t, threshold=273.15
|
||||
└── param=tp, threshold=0.1/1/10/100/20/25/5/50
|
||||
""")
|
||||
q2
|
||||
```
|
||||
We would not recommend trying to write this representation by hand though.
|
||||
|
||||
Finally, quite a flexible approach is to take the union of a series of dense datacubes:
|
||||
|
||||
```{code-cell} python3
|
||||
q3 = Qube.from_datacube(
|
||||
dict(
|
||||
param="10u/10v/2d/2t/cp/msl/skt/sp/tcw/tp".split("/"),
|
||||
threshold="*",
|
||||
levtype="sfc",
|
||||
frequency="6:00:00",
|
||||
)
|
||||
) | Qube.from_datacube(
|
||||
dict(
|
||||
param="q/t/u/v/w/z".split("/"),
|
||||
threshold="*",
|
||||
levtype="pl",
|
||||
level="50/100/150/200/250/300/400/500/600/700/850".split("/"),
|
||||
frequency="6:00:00",
|
||||
)
|
||||
)
|
||||
q3
|
||||
```
|
||||
|
||||
## Operations on Qubes
|
||||
|
||||
Going back to that first qube:
|
||||
```{code-cell} python3
|
||||
q1
|
||||
```
|
||||
|
||||
We can compress it:
|
||||
|
||||
```{code-cell} python3
|
||||
cq = q1.compress()
|
||||
assert cq.n_leaves == q1.n_leaves
|
||||
print(f"{cq.n_leaves = }, {cq.n_nodes = }")
|
||||
cq
|
||||
```
|
||||
|
||||
With the HTML representation you can click on the leaves to expand them. You can copy a path representation of a node to the clipboard by alt/option/⌥ clicking on it. You can then extract that node in code using `[]`:
|
||||
|
||||
```{code-cell} python3
|
||||
cq["class=rd,expver=0001"]
|
||||
```
|
||||
|
||||
Select a subtree:
|
||||
|
||||
```{code-cell} python3
|
||||
cq["class", "od"]["expver", "0001"]
|
||||
```
|
||||
|
||||
Intersect with a dense datacube:
|
||||
|
||||
```{code-cell} python3
|
||||
dq = Qube.from_datacube({
|
||||
"class": ["od", "rd", "cd"],
|
||||
"expver": ["0001", "0002", "0003"],
|
||||
"param": "2",
|
||||
})
|
||||
|
||||
(cq & dq).print()
|
||||
```
|
||||
|
||||
|
||||
## Iteration
|
||||
|
||||
Iterate over the leaves:
|
||||
|
||||
```{code-cell} python3
|
||||
for i, identifier in enumerate(cq.leaves()):
|
||||
print(identifier)
|
||||
if i > 10:
|
||||
print("...")
|
||||
break
|
||||
```
|
||||
|
||||
Or if you can it's more efficient to iterate over the datacubes:
|
||||
|
||||
```{code-cell} python3
|
||||
list(cq.datacubes())
|
||||
```
|
||||
|
||||
## Selection
|
||||
Select a subset of the tree:
|
||||
|
||||
```{code-cell} python3
|
||||
climate_dt.select({
|
||||
"activity": "scenariomip"
|
||||
}).html(depth=1)
|
||||
```
|
||||
|
||||
Use `.span("key")` to get the set of possibles values for a key, note this includes anywhere this key appears in the tree.
|
||||
|
||||
```{code-cell} python3
|
||||
climate_dt.span("activity")
|
||||
```
|
||||
|
||||
Use `.axes()` to get the span of every key in one go.
|
||||
|
||||
```{code-cell} python3
|
||||
axes = climate_dt.axes()
|
||||
for key, values in axes.items():
|
||||
print(f"{key} : {list(values)[:10]}")
|
||||
```
|
||||
|
||||
|
||||
## Set Operations
|
||||
|
||||
The union/intersection/difference of two dense datacubes is not itself dense.
|
||||
|
||||
```{code-cell} python3
|
||||
A = Qube.from_dict({"a=1/2/3" : {"b=i/j/k" : {}},})
|
||||
B = Qube.from_dict({"a=2/3/4" : {"b=j/k/l" : {}},})
|
||||
|
||||
A.print(), B.print();
|
||||
```
|
||||
|
||||
Union:
|
||||
|
||||
```{code-cell} python3
|
||||
(A | B).print();
|
||||
```
|
||||
|
||||
Intersection:
|
||||
|
||||
```{code-cell} python3
|
||||
(A & B).print();
|
||||
```
|
||||
|
||||
Difference:
|
||||
|
||||
```{code-cell} python3
|
||||
(A - B).print();
|
||||
```
|
||||
|
||||
Symmetric Difference:
|
||||
|
||||
```{code-cell} python3
|
||||
(A ^ B).print();
|
||||
```
|
||||
|
||||
## Transformations
|
||||
|
||||
`q.transform` takes a python function from one node to one or more nodes and uses this to build a new tree. This can be used for simple operations on the key or values but also to split or remove nodes. Note that you can't use it to merge nodes beause it's only allowed to see one node at a time.
|
||||
|
||||
```{code-cell} python3
|
||||
def capitalize(node): return node.replace(key = node.key.capitalize())
|
||||
climate_dt.transform(capitalize).html(depth=1)
|
||||
```
|
||||
|
||||
## Save to disk
|
||||
|
||||
There is currently a very simple JSON serialisation format. More compact binary serialisations are planned.
|
||||
```{code-cell} python3
|
||||
json = climate_dt.to_json()
|
||||
Qube.from_json(json) == climate_dt
|
||||
```
|
||||
|
||||
## Advanced Selection
|
||||
|
||||
There is currently partial support for different datatypes in addition to strings. Here we can convert datatypes by key to ints and timedeltas and then use functions as filters in select.
|
||||
|
||||
```{code-cell} python3
|
||||
from datetime import timedelta, datetime
|
||||
def to_timedelta(t):
|
||||
dt = datetime.strptime(t, "%H:%M:%S")
|
||||
return timedelta(hours=dt.hour, minutes=dt.minute, seconds=dt.second)
|
||||
|
||||
q = Qube.from_tree("""
|
||||
root, frequency=6:00:00
|
||||
├── levtype=pl, levelist=850, threshold=-2/-4/-8/2/4/8
|
||||
└── levtype=sfc
|
||||
├── param=10u/10v, threshold=10/15
|
||||
├── param=2t, threshold=273.15
|
||||
└── param=tp, threshold=0.1/1/10/100/20/25/5/50
|
||||
""").convert_dtypes({
|
||||
"threshold": float,
|
||||
"levelist": int,
|
||||
"frequency": to_timedelta,
|
||||
})
|
||||
|
||||
r = q.select({
|
||||
"threshold": lambda t: t > 5,
|
||||
"frequency": lambda dt: dt > timedelta(hours = 2),
|
||||
})
|
||||
|
||||
r
|
||||
```
|
3
docs/requirements.txt
Normal file
3
docs/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
numpy
|
||||
scipy
|
||||
objsize
|
1
fiab/.gitignore
vendored
Normal file
1
fiab/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
!*.json
|
37
fiab/example_products.md
Normal file
37
fiab/example_products.md
Normal file
@ -0,0 +1,37 @@
|
||||
|
||||
|
||||
|
||||
Simplest possible product
|
||||
- one field: 2 metre temperature
|
||||
- all models that output param=2t would work
|
||||
- may also have a lead time range specified from
|
||||
|
||||
So we could say "here are all the models with param=2t with lead times in the specified interval"
|
||||
|
||||
quantiles
|
||||
param:
|
||||
float range from 0 - 100
|
||||
|
||||
threshold:
|
||||
"give me 2 metre temperature values that are above this threshold"
|
||||
|
||||
|
||||
product requrements can be specified as a set of:
|
||||
params: one or more params
|
||||
levels: one or more or all
|
||||
time:
|
||||
- product could be specific to a particular time
|
||||
- could require at least a months worth of data
|
||||
|
||||
|
||||
make some fake models that have:
|
||||
- fewer params
|
||||
- continous times vs steps of 6 hours
|
||||
-
|
||||
|
||||
|
||||
Could also represent what data is currently cached on disk and be able to then tell the use what stuff they can generate really fast.
|
||||
|
||||
API want:
|
||||
- way to get axis span like what params exist
|
||||
-
|
48
fiab/extract.py
Normal file
48
fiab/extract.py
Normal file
@ -0,0 +1,48 @@
|
||||
import json
|
||||
from collections import defaultdict
|
||||
|
||||
from qubed import Qube
|
||||
|
||||
metadata = json.load(open("raw_anemoi_metadata.json"))
|
||||
|
||||
predicted_indices = [
|
||||
*metadata["data_indices"]["data"]["output"]["prognostic"],
|
||||
*metadata["data_indices"]["data"]["output"]["diagnostic"],
|
||||
]
|
||||
variables = metadata["dataset"]["variables"]
|
||||
variables = [variables[i] for i in predicted_indices]
|
||||
|
||||
# print('Raw Model Variables:', variables)
|
||||
|
||||
# Split variables between pressure and surface
|
||||
surface_variables = [v for v in variables if "_" not in v]
|
||||
|
||||
# Collect the levels for each pressure variable
|
||||
level_variables = defaultdict(list)
|
||||
for v in variables:
|
||||
if "_" in v:
|
||||
variable, level = v.split("_")
|
||||
level_variables[variable].append(int(level))
|
||||
|
||||
# print(level_variables)
|
||||
|
||||
model_tree = Qube.empty()
|
||||
|
||||
for variable, levels in level_variables.items():
|
||||
model_tree = model_tree | Qube.from_datacube(
|
||||
{
|
||||
"levtype": "pl",
|
||||
"param": variable,
|
||||
"level": levels,
|
||||
}
|
||||
)
|
||||
|
||||
for variable in surface_variables:
|
||||
model_tree = model_tree | Qube.from_datacube(
|
||||
{
|
||||
"levtype": "sfc",
|
||||
"param": variable,
|
||||
}
|
||||
)
|
||||
|
||||
print(model_tree.to_json())
|
1
fiab/raw_anemoi_metadata.json
Normal file
1
fiab/raw_anemoi_metadata.json
Normal file
File diff suppressed because one or more lines are too long
67
fiab/structure.yaml
Normal file
67
fiab/structure.yaml
Normal file
@ -0,0 +1,67 @@
|
||||
# Format: list of models, each model has a model_outputs field which contains a nested tree of nodes
|
||||
# Nodes have {node: name, cube: list of key value(s) pairs, children: list[nodes]}
|
||||
|
||||
- model: surface_and_atmosphere_model
|
||||
model_outputs:
|
||||
- node: root
|
||||
cube:
|
||||
class: rd
|
||||
stream: anemoi
|
||||
expver: something
|
||||
lead_time:
|
||||
type: datetime
|
||||
format: '%Y-%m-%d %H:%M:%S'
|
||||
step: 6h
|
||||
|
||||
|
||||
children:
|
||||
- node: pressure_variables
|
||||
other_metadata: something
|
||||
cube:
|
||||
param: ['q', 't', 'u', 'v', 'w', 'z']
|
||||
level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
|
||||
|
||||
|
||||
- node: surface_variables
|
||||
other_metadata: something
|
||||
cube:
|
||||
param: ['sp', 'msl', '10u', '10v', '2t', '2d', 'skt', 'tcw', 'cp', 'tp']
|
||||
|
||||
# Hypothetical Ocean variables
|
||||
- node: ocean_variables
|
||||
cube:
|
||||
param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"]
|
||||
ocean_levels: [??, ??]
|
||||
|
||||
# Alternative List of cubes format
|
||||
- model: surface_and_atmosphere_model
|
||||
model_outputs:
|
||||
- node: root
|
||||
cube:
|
||||
class: rd
|
||||
stream: anemoi
|
||||
expver: something
|
||||
lead_time:
|
||||
type: datetime
|
||||
format: '%Y-%m-%d %H:%M:%S'
|
||||
step: 6h
|
||||
|
||||
|
||||
children:
|
||||
- node: pressure_variables
|
||||
other_metadata: something
|
||||
cube:
|
||||
param: ['q', 't', 'u', 'v', 'w', 'z']
|
||||
level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
|
||||
|
||||
|
||||
- node: surface_variables
|
||||
other_metadata: something
|
||||
cube:
|
||||
param: ['sp', 'msl', '10u', '10v', '2t', '2d', 'skt', 'tcw', 'cp', 'tp']
|
||||
|
||||
# Hypothetical Ocean variables
|
||||
- node: ocean_variables
|
||||
cube:
|
||||
param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"]
|
||||
ocean_levels: [??, ??]
|
67
pyproject.toml
Normal file
67
pyproject.toml
Normal file
@ -0,0 +1,67 @@
|
||||
[build-system]
|
||||
requires = ["maturin>=1.0,<2.0"]
|
||||
build-backend = "maturin"
|
||||
|
||||
[project]
|
||||
name = "qubed"
|
||||
description = "A library that provides a tree of datacubes called Qube."
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{name = "Tom Hodson", email = "thomas.hodson@ecmwf.int"},
|
||||
]
|
||||
classifiers = [
|
||||
"Programming Language :: Rust",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
"Programming Language :: Python :: Implementation :: PyPy",
|
||||
]
|
||||
requires-python = ">= 3.11"
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"frozendict",
|
||||
"numpy",
|
||||
"protobuf",
|
||||
|
||||
# CLI
|
||||
"rich",
|
||||
"click",
|
||||
"psutil"
|
||||
]
|
||||
|
||||
# Because this is a mixed rust/python project the structure is src/python/qubed rather than the more typical src/qubed
|
||||
# Since this is slightly non-standard so we have to explicitly tell setuptools the python source is there.
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src/python"]
|
||||
|
||||
[project.scripts]
|
||||
qubed = "qubed.__main__:main"
|
||||
|
||||
[tool.maturin]
|
||||
python-source = "src/python"
|
||||
module-name = "qubed.rust"
|
||||
features = ["pyo3/extension-module"]
|
||||
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
stac_server = [
|
||||
"fastapi",
|
||||
]
|
||||
|
||||
docs = [
|
||||
"sphinx",
|
||||
"sphinx-rtd-theme",
|
||||
"myst_nb",
|
||||
"sphinx-autobuild",
|
||||
"jupyterlab",
|
||||
"ipykernel",
|
||||
]
|
||||
|
||||
dev = [
|
||||
"pytest",
|
||||
"black",
|
||||
"ruff",
|
||||
"flake8",
|
||||
"pre-commit",
|
||||
"isort",
|
||||
|
||||
]
|
7
run.sh
7
run.sh
@ -1,2 +1,7 @@
|
||||
cd backend
|
||||
fastapi dev main.py
|
||||
# ../.venv/bin/fastapi dev main.py
|
||||
../.venv/bin/uvicorn main:app --reload \
|
||||
--reload-include="*.html" \
|
||||
--reload-include="*.css" \
|
||||
--reload-include="*.js" \
|
||||
--reload-include="*.yaml"
|
||||
|
7
run_prod.sh
Executable file
7
run_prod.sh
Executable file
@ -0,0 +1,7 @@
|
||||
cd backend
|
||||
# sudo ../.venv/bin/fastapi dev main.py --port 80
|
||||
sudo ../.venv/bin/uvicorn main:app --port 80 --host 0.0.0.0 --reload\
|
||||
--reload-include="*.html" \
|
||||
--reload-include="*.css" \
|
||||
--reload-include="*.js" \
|
||||
--reload-include="*.yaml"
|
9
scripts/build_images.sh
Executable file
9
scripts/build_images.sh
Executable file
@ -0,0 +1,9 @@
|
||||
set -e
|
||||
|
||||
sudo docker login eccr.ecmwf.int
|
||||
|
||||
sudo docker build \
|
||||
--tag=eccr.ecmwf.int/qubed/stac_server:latest \
|
||||
--target=stac_server \
|
||||
.
|
||||
sudo docker push eccr.ecmwf.int/qubed/stac_server:latest
|
2
scripts/deploy.sh
Executable file
2
scripts/deploy.sh
Executable file
@ -0,0 +1,2 @@
|
||||
# helm install qubed chart -n qubed
|
||||
helm upgrade qubed chart -n qubed
|
1
scripts/logs.sh
Normal file
1
scripts/logs.sh
Normal file
@ -0,0 +1 @@
|
||||
kubectl -n qubed logs deployment/stac-server
|
2
scripts/restart.sh
Executable file
2
scripts/restart.sh
Executable file
@ -0,0 +1,2 @@
|
||||
# kubectl rollout restart deployment/redis
|
||||
kubectl -n qubed rollout restart deployment/stac-server
|
692
src/python/qubed/Qube.py
Normal file
692
src/python/qubed/Qube.py
Normal file
@ -0,0 +1,692 @@
|
||||
# This causes python types to be evaluated later,
|
||||
# allowing you to reference types like Qube inside the definion of the Qube class
|
||||
# without having to do "Qube"
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import functools
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Iterator, Literal, Mapping, Self, Sequence
|
||||
|
||||
import numpy as np
|
||||
from frozendict import frozendict
|
||||
|
||||
from . import set_operations
|
||||
from .metadata import from_nodes
|
||||
from .protobuf.adapters import proto_to_qube, qube_to_proto
|
||||
from .tree_formatters import (
|
||||
HTML,
|
||||
_display,
|
||||
node_tree_to_html,
|
||||
node_tree_to_string,
|
||||
)
|
||||
from .value_types import (
|
||||
QEnum,
|
||||
ValueGroup,
|
||||
WildcardGroup,
|
||||
values_from_json,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AxisInfo:
|
||||
key: str
|
||||
type: Any
|
||||
depths: set[int]
|
||||
values: set
|
||||
|
||||
def combine(self, other: Self):
|
||||
self.key = other.key
|
||||
self.type = other.type
|
||||
self.depths.update(other.depths)
|
||||
self.values.update(other.values)
|
||||
# print(f"combining {self} and {other} getting {result}")
|
||||
|
||||
def to_json(self):
|
||||
return {
|
||||
"key": self.key,
|
||||
"type": self.type.__name__,
|
||||
"values": list(self.values),
|
||||
"depths": list(self.depths),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True, eq=True, order=True, unsafe_hash=True)
|
||||
class QubeNamedRoot:
|
||||
"Helper class to print a custom root name"
|
||||
|
||||
key: str
|
||||
children: tuple[Qube, ...] = ()
|
||||
|
||||
def summary(self) -> str:
|
||||
return self.key
|
||||
|
||||
|
||||
@dataclass(frozen=False, eq=True, order=True, unsafe_hash=True)
|
||||
class Qube:
|
||||
key: str
|
||||
values: ValueGroup
|
||||
metadata: frozendict[str, np.ndarray] = field(
|
||||
default_factory=lambda: frozendict({}), compare=False
|
||||
)
|
||||
children: tuple[Qube, ...] = ()
|
||||
is_root: bool = False
|
||||
is_leaf: bool = False
|
||||
depth: int = field(default=0, compare=False)
|
||||
shape: tuple[int, ...] = field(default=(), compare=False)
|
||||
|
||||
@classmethod
|
||||
def make_node(
|
||||
cls,
|
||||
key: str,
|
||||
values: Iterable | QEnum | WildcardGroup,
|
||||
children: Iterable[Qube],
|
||||
metadata: Mapping[str, np.ndarray] = {},
|
||||
is_root: bool = False,
|
||||
is_leaf: bool | None = None,
|
||||
) -> Qube:
|
||||
if isinstance(values, ValueGroup):
|
||||
values = values
|
||||
else:
|
||||
values = QEnum(values)
|
||||
|
||||
if not isinstance(values, WildcardGroup) and not is_root:
|
||||
assert len(values) > 0, "Nodes must have at least one value"
|
||||
|
||||
children = tuple(sorted(children, key=lambda n: ((n.key, n.values.min()))))
|
||||
|
||||
return cls(
|
||||
key,
|
||||
values=values,
|
||||
children=children,
|
||||
metadata=frozendict(metadata),
|
||||
is_root=is_root,
|
||||
is_leaf=(not len(children)) if is_leaf is None else is_leaf,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def make_root(cls, children: Iterable[Qube], metadata={}) -> Qube:
|
||||
def update_depth_shape(children, depth, shape):
|
||||
for child in children:
|
||||
child.depth = depth + 1
|
||||
child.shape = shape + (len(child.values),)
|
||||
update_depth_shape(child.children, child.depth, child.shape)
|
||||
|
||||
update_depth_shape(children, depth=0, shape=(1,))
|
||||
|
||||
return cls.make_node(
|
||||
"root",
|
||||
values=QEnum(("root",)),
|
||||
children=children,
|
||||
metadata=metadata,
|
||||
is_root=True,
|
||||
)
|
||||
|
||||
def replace(self, **kwargs) -> Qube:
|
||||
return dataclasses.replace(self, **kwargs)
|
||||
|
||||
def summary(self) -> str:
|
||||
if self.is_root:
|
||||
return self.key
|
||||
return f"{self.key}={self.values.summary()}" if self.key != "root" else "root"
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: str | Path) -> Qube:
|
||||
with open(path, "r") as f:
|
||||
return Qube.from_json(json.load(f))
|
||||
|
||||
@classmethod
|
||||
def from_datacube(cls, datacube: Mapping[str, str | Sequence[str]]) -> Qube:
|
||||
key_vals = list(datacube.items())[::-1]
|
||||
|
||||
children: list[Qube] = []
|
||||
for key, values in key_vals:
|
||||
values_group: ValueGroup
|
||||
if values == "*":
|
||||
values_group = WildcardGroup()
|
||||
elif isinstance(values, list):
|
||||
values_group = QEnum(values)
|
||||
else:
|
||||
values_group = QEnum([values])
|
||||
|
||||
children = [cls.make_node(key, values_group, children)]
|
||||
|
||||
return cls.make_root(children)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json: dict) -> Qube:
|
||||
def from_json(json: dict, depth=0) -> Qube:
|
||||
return Qube.make_node(
|
||||
key=json["key"],
|
||||
values=values_from_json(json["values"]),
|
||||
metadata=frozendict(json["metadata"]) if "metadata" in json else {},
|
||||
children=(from_json(c, depth + 1) for c in json["children"]),
|
||||
is_root=(depth == 0),
|
||||
)
|
||||
|
||||
return from_json(json)
|
||||
|
||||
@classmethod
|
||||
def from_nodes(cls, nodes: dict[str, dict], add_root: bool = True):
|
||||
return from_nodes(cls, nodes, add_root)
|
||||
|
||||
def to_json(self) -> dict:
|
||||
def to_json(node: Qube) -> dict:
|
||||
return {
|
||||
"key": node.key,
|
||||
"values": node.values.to_json(),
|
||||
"metadata": dict(node.metadata),
|
||||
"children": [to_json(c) for c in node.children],
|
||||
}
|
||||
|
||||
return to_json(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict) -> Qube:
|
||||
def from_dict(d: dict) -> Iterator[Qube]:
|
||||
for k, children in d.items():
|
||||
key, values = k.split("=")
|
||||
values = values.split("/")
|
||||
# children == {"..." : {}}
|
||||
# is a special case to represent trees with leaves we don't know about
|
||||
if frozendict(children) == frozendict({"...": {}}):
|
||||
yield Qube.make_node(
|
||||
key=key,
|
||||
values=values,
|
||||
children={},
|
||||
is_leaf=False,
|
||||
)
|
||||
|
||||
# Special case for Wildcard values
|
||||
if values == ["*"]:
|
||||
values = WildcardGroup()
|
||||
else:
|
||||
values = QEnum(values)
|
||||
|
||||
yield Qube.make_node(
|
||||
key=key,
|
||||
values=values,
|
||||
children=from_dict(children),
|
||||
)
|
||||
|
||||
return Qube.make_root(list(from_dict(d)))
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
def to_dict(q: Qube) -> tuple[str, dict]:
|
||||
key = f"{q.key}={','.join(str(v) for v in q.values)}"
|
||||
return key, dict(to_dict(c) for c in q.children)
|
||||
|
||||
return to_dict(self)[1]
|
||||
|
||||
@classmethod
|
||||
def from_protobuf(cls, msg: bytes) -> Qube:
|
||||
return proto_to_qube(cls, msg)
|
||||
|
||||
def to_protobuf(self) -> bytes:
|
||||
return qube_to_proto(self)
|
||||
|
||||
@classmethod
|
||||
def from_tree(cls, tree_str):
|
||||
lines = tree_str.splitlines()
|
||||
stack = []
|
||||
root = {}
|
||||
|
||||
initial_indent = None
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
# Remove tree characters and measure indent level
|
||||
stripped = line.lstrip(" │├└─")
|
||||
indent = (len(line) - len(stripped)) // 4
|
||||
if initial_indent is None:
|
||||
initial_indent = indent
|
||||
indent = indent - initial_indent
|
||||
|
||||
# Split multiple key=value parts into nested structure
|
||||
keys = [item.strip() for item in stripped.split(",")]
|
||||
current = bottom = {}
|
||||
for key in reversed(keys):
|
||||
current = {key: current}
|
||||
|
||||
# Adjust the stack to current indent level
|
||||
# print(len(stack), stack)
|
||||
while len(stack) > indent:
|
||||
stack.pop()
|
||||
|
||||
if stack:
|
||||
# Add to the dictionary at current stack level
|
||||
parent = stack[-1]
|
||||
key = list(current.keys())[0]
|
||||
if key in parent:
|
||||
raise ValueError(
|
||||
f"This function doesn't yet support reading in uncompressed trees, repeated key is {key}"
|
||||
)
|
||||
parent[key] = current[key]
|
||||
else:
|
||||
# Top level
|
||||
key = list(current.keys())[0]
|
||||
if root:
|
||||
raise ValueError(
|
||||
f"This function doesn't yet support reading in uncompressed trees, repeated key is {key}"
|
||||
)
|
||||
root = current[key]
|
||||
|
||||
# Push to the stack
|
||||
stack.append(bottom)
|
||||
|
||||
return cls.from_dict(root)
|
||||
|
||||
@classmethod
|
||||
def empty(cls) -> Qube:
|
||||
return Qube.make_root([])
|
||||
|
||||
def __str_helper__(self, depth=None, name=None) -> str:
|
||||
node = self
|
||||
if name is not None:
|
||||
node = node.replace(key=name)
|
||||
out = "".join(node_tree_to_string(node=node, depth=depth))
|
||||
if out[-1] == "\n":
|
||||
out = out[:-1]
|
||||
return out
|
||||
|
||||
def __str__(self):
|
||||
return self.__str_helper__()
|
||||
|
||||
def __repr__(self):
|
||||
return f"Qube({self.__str_helper__()})"
|
||||
|
||||
def print(self, depth=None, name: str | None = None):
|
||||
print(self.__str_helper__(depth=depth, name=name))
|
||||
|
||||
def html(
|
||||
self,
|
||||
depth=2,
|
||||
collapse=True,
|
||||
name: str | None = None,
|
||||
info: Callable[[Qube], str] | None = None,
|
||||
) -> HTML:
|
||||
node = self
|
||||
if name is not None:
|
||||
node = node.replace(key=name)
|
||||
return HTML(
|
||||
node_tree_to_html(node=node, depth=depth, collapse=collapse, info=info)
|
||||
)
|
||||
|
||||
def _repr_html_(self) -> str:
|
||||
return node_tree_to_html(self, depth=2, collapse=True)
|
||||
|
||||
# Allow "key=value/value" / qube to prepend keys
|
||||
def __rtruediv__(self, other: str) -> Qube:
|
||||
key, values = other.split("=")
|
||||
values_enum = QEnum((values.split("/")))
|
||||
return Qube.make_root([Qube.make_node(key, values_enum, self.children)])
|
||||
|
||||
def __or__(self, other: Qube) -> Qube:
|
||||
return set_operations.operation(
|
||||
self, other, set_operations.SetOperation.UNION, type(self)
|
||||
)
|
||||
|
||||
def __and__(self, other: Qube) -> Qube:
|
||||
return set_operations.operation(
|
||||
self, other, set_operations.SetOperation.INTERSECTION, type(self)
|
||||
)
|
||||
|
||||
def __sub__(self, other: Qube) -> Qube:
|
||||
return set_operations.operation(
|
||||
self, other, set_operations.SetOperation.DIFFERENCE, type(self)
|
||||
)
|
||||
|
||||
def __xor__(self, other: Qube) -> Qube:
|
||||
return set_operations.operation(
|
||||
self, other, set_operations.SetOperation.SYMMETRIC_DIFFERENCE, type(self)
|
||||
)
|
||||
|
||||
def leaves(self) -> Iterable[dict[str, str]]:
|
||||
for value in self.values:
|
||||
if not self.children:
|
||||
yield {self.key: value}
|
||||
for child in self.children:
|
||||
for leaf in child.leaves():
|
||||
if self.key != "root":
|
||||
yield {self.key: value, **leaf}
|
||||
else:
|
||||
yield leaf
|
||||
|
||||
def leaf_nodes(self) -> "Iterable[tuple[dict[str, str], Qube]]":
|
||||
for value in self.values:
|
||||
if not self.children:
|
||||
yield ({self.key: value}, self)
|
||||
for child in self.children:
|
||||
for leaf in child.leaf_nodes():
|
||||
if self.key != "root":
|
||||
yield ({self.key: value, **leaf[0]}, leaf[1])
|
||||
else:
|
||||
yield leaf
|
||||
|
||||
def leaves_with_metadata(
|
||||
self, indices=()
|
||||
) -> Iterator[tuple[dict[str, str], dict[str, str | np.ndarray]]]:
|
||||
if self.key == "root":
|
||||
for c in self.children:
|
||||
yield from c.leaves_with_metadata(indices=())
|
||||
return
|
||||
|
||||
for index, value in enumerate(self.values):
|
||||
indexed_metadata = {
|
||||
k: vs[indices + (index,)] for k, vs in self.metadata.items()
|
||||
}
|
||||
indexed_metadata = {
|
||||
k: v.item() if v.shape == () else v for k, v in indexed_metadata.items()
|
||||
}
|
||||
if not self.children:
|
||||
yield {self.key: value}, indexed_metadata
|
||||
|
||||
for child in self.children:
|
||||
for leaf, metadata in child.leaves_with_metadata(
|
||||
indices=indices + (index,)
|
||||
):
|
||||
if self.key != "root":
|
||||
yield {self.key: value, **leaf}, metadata | indexed_metadata
|
||||
else:
|
||||
yield leaf, metadata
|
||||
|
||||
def datacubes(self) -> Iterable[dict[str, Any | list[Any]]]:
|
||||
def to_list_of_cubes(node: Qube) -> Iterable[dict[str, Any | list[Any]]]:
|
||||
if node.key == "root":
|
||||
for c in node.children:
|
||||
yield from to_list_of_cubes(c)
|
||||
|
||||
else:
|
||||
if not node.children:
|
||||
yield {node.key: list(node.values)}
|
||||
|
||||
for c in node.children:
|
||||
for sub_cube in to_list_of_cubes(c):
|
||||
yield {node.key: list(node.values)} | sub_cube
|
||||
|
||||
return to_list_of_cubes(self)
|
||||
|
||||
def __getitem__(self, args) -> Qube:
|
||||
if isinstance(args, str):
|
||||
specifiers = args.split(",")
|
||||
current = self
|
||||
for specifier in specifiers:
|
||||
key, values_str = specifier.split("=")
|
||||
values = values_str.split("/")
|
||||
for c in current.children:
|
||||
if c.key == key and set(values) == set(c.values):
|
||||
current = c
|
||||
break
|
||||
else:
|
||||
raise KeyError(
|
||||
f"Key '{key}' not found in children of '{current.key}', available keys are {[c.key for c in current.children]}"
|
||||
)
|
||||
return Qube.make_root(current.children)
|
||||
|
||||
elif isinstance(args, tuple) and len(args) == 2:
|
||||
key, value = args
|
||||
for c in self.children:
|
||||
if c.key == key and value in c.values:
|
||||
return Qube.make_root(c.children)
|
||||
raise KeyError(f"Key '{key}' not found in children of {self.key}")
|
||||
else:
|
||||
raise ValueError(f"Unknown key type {args}")
|
||||
|
||||
@cached_property
|
||||
def n_leaves(self) -> int:
|
||||
# This line makes the equation q.n_leaves + r.n_leaves == (q | r).n_leaves true is q and r have no overlap
|
||||
if self.key == "root" and not self.children:
|
||||
return 0
|
||||
return len(self.values) * (
|
||||
sum(c.n_leaves for c in self.children) if self.children else 1
|
||||
)
|
||||
|
||||
@cached_property
|
||||
def n_nodes(self) -> int:
|
||||
if self.key == "root" and not self.children:
|
||||
return 0
|
||||
return 1 + sum(c.n_nodes for c in self.children)
|
||||
|
||||
def transform(self, func: "Callable[[Qube], Qube | Iterable[Qube]]") -> Qube:
|
||||
"""
|
||||
Call a function on every node of the Qube, return one or more nodes.
|
||||
If multiple nodes are returned they each get a copy of the (transformed) children of the original node.
|
||||
Any changes to the children of a node will be ignored.
|
||||
"""
|
||||
|
||||
def transform(node: Qube) -> list[Qube]:
|
||||
children = tuple(sorted(cc for c in node.children for cc in transform(c)))
|
||||
new_nodes = func(node)
|
||||
if isinstance(new_nodes, Qube):
|
||||
new_nodes = [new_nodes]
|
||||
|
||||
return [new_node.replace(children=children) for new_node in new_nodes]
|
||||
|
||||
children = tuple(cc for c in self.children for cc in transform(c))
|
||||
return self.replace(children=children)
|
||||
|
||||
def remove_by_key(self, keys: str | list[str]):
|
||||
_keys: list[str] = keys if isinstance(keys, list) else [keys]
|
||||
|
||||
def remove_key(node: Qube) -> Qube:
|
||||
children: list[Qube] = []
|
||||
for c in node.children:
|
||||
if c.key in _keys:
|
||||
grandchildren = tuple(sorted(remove_key(cc) for cc in c.children))
|
||||
grandchildren = remove_key(Qube.make_root(grandchildren)).children
|
||||
children.extend(grandchildren)
|
||||
else:
|
||||
children.append(remove_key(c))
|
||||
|
||||
return node.replace(children=tuple(sorted(children)))
|
||||
|
||||
return remove_key(self).compress()
|
||||
|
||||
def convert_dtypes(self, converters: dict[str, Callable[[Any], Any]]):
|
||||
def convert(node: Qube) -> Qube:
|
||||
if node.key in converters:
|
||||
converter = converters[node.key]
|
||||
values = [converter(v) for v in node.values]
|
||||
new_node = node.replace(values=QEnum(values))
|
||||
return new_node
|
||||
return node
|
||||
|
||||
return self.transform(convert)
|
||||
|
||||
def select(
|
||||
self,
|
||||
selection: dict[str, str | list[str] | Callable[[Any], bool]],
|
||||
mode: Literal["strict", "relaxed"] = "relaxed",
|
||||
consume=False,
|
||||
) -> Qube:
|
||||
# Find any bare str values and replace them with [str]
|
||||
_selection: dict[str, list[str] | Callable[[Any], bool]] = {}
|
||||
for k, v in selection.items():
|
||||
if isinstance(v, list):
|
||||
_selection[k] = v
|
||||
elif callable(v):
|
||||
_selection[k] = v
|
||||
else:
|
||||
_selection[k] = [v]
|
||||
|
||||
def not_none(xs):
|
||||
return tuple(x for x in xs if x is not None)
|
||||
|
||||
def select(
|
||||
node: Qube,
|
||||
selection: dict[str, list[str] | Callable[[Any], bool]],
|
||||
matched: bool,
|
||||
) -> Qube | None:
|
||||
# If this node has no children but there are still parts of the request
|
||||
# that have not been consumed, then prune this whole branch
|
||||
if consume and not node.children and selection:
|
||||
return None
|
||||
|
||||
# If the key isn't in the selection then what we do depends on the mode:
|
||||
# In strict mode we just stop here
|
||||
# In next_level mode we include the next level down so you can tell what keys to add next
|
||||
# In relaxed mode we skip the key if it't not in the request and carry on
|
||||
if node.key not in selection:
|
||||
if mode == "strict":
|
||||
return None
|
||||
|
||||
elif mode == "next_level":
|
||||
return node.replace(
|
||||
children=(),
|
||||
metadata=self.metadata
|
||||
| {"is_leaf": np.array([not bool(node.children)])},
|
||||
)
|
||||
|
||||
elif mode == "relaxed":
|
||||
pass
|
||||
else:
|
||||
raise ValueError(f"Unknown mode argument {mode}")
|
||||
|
||||
# If the key IS in the selection then check if the values match
|
||||
if node.key in _selection:
|
||||
# If the key is specified, check if any of the values match
|
||||
selection_criteria = _selection[node.key]
|
||||
if callable(selection_criteria):
|
||||
values = QEnum((c for c in node.values if selection_criteria(c)))
|
||||
elif isinstance(selection_criteria, list):
|
||||
values = QEnum((c for c in selection_criteria if c in node.values))
|
||||
else:
|
||||
raise ValueError(f"Unknown selection type {selection_criteria}")
|
||||
|
||||
# Here modes don't matter because we've explicitly filtered on this key and found nothing
|
||||
if not values:
|
||||
return None
|
||||
|
||||
matched = True
|
||||
node = node.replace(values=values)
|
||||
|
||||
if consume:
|
||||
selection = {k: v for k, v in selection.items() if k != node.key}
|
||||
|
||||
# Prune nodes that had had all their children pruned
|
||||
new_children = not_none(
|
||||
select(c, selection, matched) for c in node.children
|
||||
)
|
||||
|
||||
if node.children and not new_children:
|
||||
return None
|
||||
|
||||
metadata = dict(node.metadata)
|
||||
|
||||
if mode == "next_level":
|
||||
metadata["is_leaf"] = np.array([not bool(node.children)])
|
||||
|
||||
return node.replace(
|
||||
children=new_children,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
return self.replace(
|
||||
children=not_none(
|
||||
select(c, _selection, matched=False) for c in self.children
|
||||
)
|
||||
)
|
||||
|
||||
def span(self, key: str) -> list[str]:
|
||||
"""
|
||||
Search the whole tree for any value that a given key takes anywhere.
|
||||
"""
|
||||
this = set(self.values) if self.key == key else set()
|
||||
return sorted(this | set(v for c in self.children for v in c.span(key)))
|
||||
|
||||
def axes(self) -> dict[str, set[str]]:
|
||||
"""
|
||||
Return a dictionary of all the spans of the keys in the qube.
|
||||
"""
|
||||
axes = defaultdict(set)
|
||||
for c in self.children:
|
||||
for k, v in c.axes().items():
|
||||
axes[k].update(v)
|
||||
if self.key != "root":
|
||||
axes[self.key].update(self.values)
|
||||
return dict(axes)
|
||||
|
||||
def axes_info(self, depth=0) -> dict[str, AxisInfo]:
|
||||
axes = defaultdict(
|
||||
lambda: AxisInfo(key="", type=str, depths=set(), values=set())
|
||||
)
|
||||
for c in self.children:
|
||||
for k, info in c.axes_info(depth=depth + 1).items():
|
||||
axes[k].combine(info)
|
||||
|
||||
if self.key != "root":
|
||||
axes[self.key].combine(
|
||||
AxisInfo(
|
||||
key=self.key,
|
||||
type=type(next(iter(self.values))),
|
||||
depths={depth},
|
||||
values=set(self.values),
|
||||
)
|
||||
)
|
||||
|
||||
return dict(axes)
|
||||
|
||||
@cached_property
|
||||
def structural_hash(self) -> int:
|
||||
"""
|
||||
This hash takes into account the key, values and children's key values recursively.
|
||||
Because nodes are immutable, we only need to compute this once.
|
||||
"""
|
||||
|
||||
def hash_node(node: Qube) -> int:
|
||||
return hash(
|
||||
(node.key, node.values, tuple(c.structural_hash for c in node.children))
|
||||
)
|
||||
|
||||
return hash_node(self)
|
||||
|
||||
def compress(self) -> Qube:
|
||||
"""
|
||||
This method is quite computationally heavy because of trees like this:
|
||||
root, class=d1, generation=1
|
||||
├── time=0600, many identical keys, param=8,78,79
|
||||
├── time=0600, many identical keys, param=8,78,79
|
||||
└── time=0600, many identical keys, param=8,78,79
|
||||
This tree compresses dow n
|
||||
|
||||
"""
|
||||
|
||||
def union(a: Qube, b: Qube) -> Qube:
|
||||
b = type(self).make_root(children=(b,))
|
||||
out = set_operations.operation(
|
||||
a, b, set_operations.SetOperation.UNION, type(self)
|
||||
)
|
||||
return out
|
||||
|
||||
new_children = [c.compress() for c in self.children]
|
||||
if len(new_children) > 1:
|
||||
new_children = list(
|
||||
functools.reduce(union, new_children, Qube.empty()).children
|
||||
)
|
||||
|
||||
return self.replace(children=tuple(sorted(new_children)))
|
||||
|
||||
def add_metadata(self, **kwargs: dict[str, Any]):
|
||||
metadata = {
|
||||
k: np.array(
|
||||
[
|
||||
v,
|
||||
]
|
||||
)
|
||||
for k, v in kwargs.items()
|
||||
}
|
||||
return self.replace(metadata=metadata)
|
||||
|
||||
def strip_metadata(self) -> Qube:
|
||||
def strip(node):
|
||||
return node.replace(metadata=frozendict({}))
|
||||
|
||||
return self.transform(strip)
|
||||
|
||||
def display(self):
|
||||
_display(self)
|
4
src/python/qubed/__init__.py
Normal file
4
src/python/qubed/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
from . import protobuf
|
||||
from .Qube import Qube
|
||||
|
||||
__all__ = ["Qube", "protobuf"]
|
124
src/python/qubed/__main__.py
Normal file
124
src/python/qubed/__main__.py
Normal file
@ -0,0 +1,124 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
import click
|
||||
import psutil
|
||||
from rich.console import Console
|
||||
from rich.layout import Layout
|
||||
from rich.live import Live
|
||||
from rich.panel import Panel
|
||||
from rich.spinner import Spinner
|
||||
from rich.text import Text
|
||||
|
||||
from qubed import Qube
|
||||
from qubed.convert import parse_fdb_list
|
||||
|
||||
console = Console(stderr=True)
|
||||
process = psutil.Process()
|
||||
|
||||
PRINT_INTERVAL = 0.25
|
||||
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
"""Command-line tool for working with trees."""
|
||||
pass
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option(
|
||||
"--input",
|
||||
type=click.File("r"),
|
||||
default="-",
|
||||
help="Specify the input file (default: standard input).",
|
||||
)
|
||||
@click.option(
|
||||
"--output",
|
||||
type=click.File("w"),
|
||||
default="-",
|
||||
help="Specify the output file (default: standard output).",
|
||||
)
|
||||
@click.option(
|
||||
"--from",
|
||||
"from_format",
|
||||
type=click.Choice(["fdb", "mars"]),
|
||||
default="fdb",
|
||||
help="Specify the input format: fdb (fdb list --porcelain) or mars (mars list).",
|
||||
)
|
||||
@click.option(
|
||||
"--to",
|
||||
"to_format",
|
||||
type=click.Choice(["text", "html", "json"]),
|
||||
default="text",
|
||||
help="Specify the output format: text, html, json.",
|
||||
)
|
||||
def convert(input, output, from_format, to_format):
|
||||
"""Convert trees from one format to another."""
|
||||
q = Qube.empty()
|
||||
t = time.time()
|
||||
i0 = 0
|
||||
n0 = 0
|
||||
depth = 5
|
||||
log = Text()
|
||||
summary = Layout()
|
||||
summary.split_column(
|
||||
Layout(name="upper"),
|
||||
Layout(name="qube"),
|
||||
)
|
||||
summary["upper"].split_row(
|
||||
Layout(name="performance"),
|
||||
Layout(log, name="log"),
|
||||
)
|
||||
spinner = Spinner("aesthetic", text="Performance", speed=0.3)
|
||||
|
||||
with Live(summary, auto_refresh=False, transient=True, console=console) as live:
|
||||
for i, datacube in enumerate(parse_fdb_list(input)):
|
||||
new_branch = Qube.from_datacube(datacube)
|
||||
q = q | new_branch
|
||||
|
||||
if time.time() - t > PRINT_INTERVAL:
|
||||
tree = q.__str__(depth=depth)
|
||||
if tree.count("\n") > 20:
|
||||
depth -= 1
|
||||
if tree.count("\n") < 5:
|
||||
depth += 1
|
||||
|
||||
summary["performance"].update(
|
||||
Panel(
|
||||
Text.assemble(
|
||||
f"The Qube has {q.n_leaves} leaves and {q.n_nodes} internal nodes so far.\n",
|
||||
f"{(i - i0) / (time.time() - t) / PRINT_INTERVAL:.0f} lines per second. ",
|
||||
f"{(q.n_leaves - n0) / (time.time() - t):.0f} leaves per second.\n",
|
||||
f"Memory usage: {process.memory_info().rss / 1024 / 1024:.0f} MB\n",
|
||||
),
|
||||
title=spinner.render(time.time()),
|
||||
border_style="blue",
|
||||
)
|
||||
)
|
||||
summary["qube"].update(
|
||||
Panel(tree, title=f"Qube (depth {depth})", border_style="blue")
|
||||
)
|
||||
summary["log"].update(
|
||||
Panel(
|
||||
f"{datacube}", border_style="blue", title="Last Datacube Added"
|
||||
)
|
||||
)
|
||||
live.refresh()
|
||||
i0 = i
|
||||
n0 = q.n_leaves
|
||||
t = time.time()
|
||||
|
||||
if to_format == "text":
|
||||
output_content = str(q)
|
||||
elif to_format == "json":
|
||||
output_content = json.dumps(q.to_json())
|
||||
elif to_format == "html":
|
||||
output_content = q.html().html
|
||||
else:
|
||||
output_content = str(q)
|
||||
|
||||
output.write(output_content)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
29
src/python/qubed/convert.py
Normal file
29
src/python/qubed/convert.py
Normal file
@ -0,0 +1,29 @@
|
||||
def parse_key_value_pairs(text: str):
|
||||
result = {}
|
||||
text = text.replace("}{", ",") # Replace segment separators
|
||||
text = (
|
||||
text.replace("{", "").replace("}", "").strip()
|
||||
) # Remove leading/trailing braces
|
||||
|
||||
for segment in text.split(","):
|
||||
if "=" not in segment:
|
||||
print(segment)
|
||||
key, values_str = segment.split(
|
||||
"=", 1
|
||||
) # Ensure split only happens at first "="
|
||||
values = values_str.split("/")
|
||||
result[key] = values
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_fdb_list(f):
|
||||
for line in f.readlines():
|
||||
# Handle fdb list normal
|
||||
if line.startswith("{"):
|
||||
yield parse_key_value_pairs(line)
|
||||
|
||||
# handle fdb list --compact
|
||||
if line.startswith("retrieve,") and not line.startswith("retrieve,\n"):
|
||||
line = line[9:]
|
||||
yield parse_key_value_pairs(line)
|
43
src/python/qubed/metadata.py
Normal file
43
src/python/qubed/metadata.py
Normal file
@ -0,0 +1,43 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterator
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .value_types import QEnum
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .Qube import Qube
|
||||
|
||||
|
||||
def make_node(
|
||||
cls,
|
||||
key: str,
|
||||
values: Iterator,
|
||||
shape: list[int],
|
||||
children: tuple[Qube, ...],
|
||||
metadata: dict[str, np.ndarray] | None = None,
|
||||
):
|
||||
return cls.make_node(
|
||||
key=key,
|
||||
values=QEnum(values),
|
||||
metadata={k: np.array(v).reshape(shape) for k, v in metadata.items()}
|
||||
if metadata is not None
|
||||
else {},
|
||||
children=children,
|
||||
)
|
||||
|
||||
|
||||
def from_nodes(cls, nodes, add_root=True):
|
||||
shape = [len(n["values"]) for n in nodes.values()]
|
||||
nodes = nodes.items()
|
||||
*nodes, (key, info) = nodes
|
||||
root = make_node(cls, shape=shape, children=(), key=key, **info)
|
||||
|
||||
for key, info in reversed(nodes):
|
||||
shape.pop()
|
||||
root = make_node(cls, shape=shape, children=(root,), key=key, **info)
|
||||
|
||||
if add_root:
|
||||
return cls.make_root(children=(root,))
|
||||
return root
|
0
src/python/qubed/protobuf/__init__.py
Normal file
0
src/python/qubed/protobuf/__init__.py
Normal file
109
src/python/qubed/protobuf/adapters.py
Normal file
109
src/python/qubed/protobuf/adapters.py
Normal file
@ -0,0 +1,109 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
from frozendict import frozendict
|
||||
|
||||
from ..value_types import QEnum
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
"Protobuf gencode version",
|
||||
UserWarning,
|
||||
"google.protobuf.runtime_version",
|
||||
)
|
||||
from . import qube_pb2
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..Qube import Qube
|
||||
|
||||
|
||||
def _ndarray_to_proto(arr: np.ndarray) -> qube_pb2.NdArray:
|
||||
"""np.ndarray → NdArray message"""
|
||||
return qube_pb2.NdArray(
|
||||
shape=list(arr.shape),
|
||||
dtype=str(arr.dtype),
|
||||
raw=arr.tobytes(order="C"),
|
||||
)
|
||||
|
||||
|
||||
def _ndarray_from_proto(msg: qube_pb2.NdArray) -> np.ndarray:
|
||||
"""NdArray message → np.ndarray (immutable view)"""
|
||||
return np.frombuffer(msg.raw, dtype=msg.dtype).reshape(tuple(msg.shape))
|
||||
|
||||
|
||||
def _py_to_valuegroup(value: list[str] | np.ndarray) -> qube_pb2.ValueGroup:
|
||||
"""Accept str-sequence *or* ndarray and return ValueGroup."""
|
||||
vg = qube_pb2.ValueGroup()
|
||||
if isinstance(value, np.ndarray):
|
||||
vg.tensor.CopyFrom(_ndarray_to_proto(value))
|
||||
else:
|
||||
vg.s.items.extend(value)
|
||||
return vg
|
||||
|
||||
|
||||
def _valuegroup_to_py(vg: qube_pb2.ValueGroup) -> list[str] | np.ndarray:
|
||||
"""ValueGroup → list[str] *or* ndarray"""
|
||||
arm = vg.WhichOneof("payload")
|
||||
if arm == "tensor":
|
||||
return _ndarray_from_proto(vg.tensor)
|
||||
|
||||
return QEnum(vg.s.items)
|
||||
|
||||
|
||||
def _py_to_metadatagroup(value: np.ndarray) -> qube_pb2.MetadataGroup:
|
||||
"""Accept str-sequence *or* ndarray and return ValueGroup."""
|
||||
vg = qube_pb2.MetadataGroup()
|
||||
if not isinstance(value, np.ndarray):
|
||||
value = np.array([value])
|
||||
|
||||
vg.tensor.CopyFrom(_ndarray_to_proto(value))
|
||||
return vg
|
||||
|
||||
|
||||
def _metadatagroup_to_py(vg: qube_pb2.MetadataGroup) -> np.ndarray:
|
||||
"""ValueGroup → list[str] *or* ndarray"""
|
||||
arm = vg.WhichOneof("payload")
|
||||
if arm == "tensor":
|
||||
return _ndarray_from_proto(vg.tensor)
|
||||
|
||||
raise ValueError(f"Unknown arm {arm}")
|
||||
|
||||
|
||||
def _qube_to_proto(q: Qube) -> qube_pb2.Qube:
|
||||
"""Frozen Qube dataclass → protobuf Qube message (new object)."""
|
||||
return qube_pb2.Qube(
|
||||
key=q.key,
|
||||
values=_py_to_valuegroup(q.values),
|
||||
metadata={k: _py_to_metadatagroup(v) for k, v in q.metadata.items()},
|
||||
children=[_qube_to_proto(c) for c in q.children],
|
||||
is_root=q.is_root,
|
||||
)
|
||||
|
||||
|
||||
def qube_to_proto(q: Qube) -> bytes:
|
||||
return _qube_to_proto(q).SerializeToString()
|
||||
|
||||
|
||||
def _proto_to_qube(cls: type, msg: qube_pb2.Qube) -> Qube:
|
||||
"""protobuf Qube message → frozen Qube dataclass (new object)."""
|
||||
|
||||
return cls.make_node(
|
||||
key=msg.key,
|
||||
values=_valuegroup_to_py(msg.values),
|
||||
metadata=frozendict(
|
||||
{k: _metadatagroup_to_py(v) for k, v in msg.metadata.items()}
|
||||
),
|
||||
children=tuple(_proto_to_qube(cls, c) for c in msg.children),
|
||||
is_root=msg.is_root,
|
||||
)
|
||||
|
||||
|
||||
def proto_to_qube(cls: type, wire: bytes) -> Qube:
|
||||
msg = qube_pb2.Qube()
|
||||
msg.ParseFromString(wire)
|
||||
return _proto_to_qube(cls, msg)
|
45
src/python/qubed/protobuf/qube_pb2.py
Normal file
45
src/python/qubed/protobuf/qube_pb2.py
Normal file
@ -0,0 +1,45 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# NO CHECKED-IN PROTOBUF GENCODE
|
||||
# source: qube.proto
|
||||
# Protobuf Python Version: 5.29.0
|
||||
"""Generated protocol buffer code."""
|
||||
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||
from google.protobuf import runtime_version as _runtime_version
|
||||
from google.protobuf import symbol_database as _symbol_database
|
||||
from google.protobuf.internal import builder as _builder
|
||||
|
||||
_runtime_version.ValidateProtobufRuntimeVersion(
|
||||
_runtime_version.Domain.PUBLIC, 5, 29, 0, "", "qube.proto"
|
||||
)
|
||||
# @@protoc_insertion_point(imports)
|
||||
|
||||
_sym_db = _symbol_database.Default()
|
||||
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
||||
b'\n\nqube.proto"4\n\x07NdArray\x12\r\n\x05shape\x18\x01 \x03(\x03\x12\r\n\x05\x64type\x18\x02 \x01(\t\x12\x0b\n\x03raw\x18\x03 \x01(\x0c"\x1c\n\x0bStringGroup\x12\r\n\x05items\x18\x01 \x03(\t"N\n\nValueGroup\x12\x19\n\x01s\x18\x01 \x01(\x0b\x32\x0c.StringGroupH\x00\x12\x1a\n\x06tensor\x18\x02 \x01(\x0b\x32\x08.NdArrayH\x00\x42\t\n\x07payload"6\n\rMetadataGroup\x12\x1a\n\x06tensor\x18\x01 \x01(\x0b\x32\x08.NdArrayH\x00\x42\t\n\x07payload"\xd1\x01\n\x04Qube\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1b\n\x06values\x18\x02 \x01(\x0b\x32\x0b.ValueGroup\x12%\n\x08metadata\x18\x03 \x03(\x0b\x32\x13.Qube.MetadataEntry\x12\r\n\x05\x64type\x18\x04 \x01(\t\x12\x17\n\x08\x63hildren\x18\x05 \x03(\x0b\x32\x05.Qube\x12\x0f\n\x07is_root\x18\x06 \x01(\x08\x1a?\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.MetadataGroup:\x02\x38\x01\x62\x06proto3'
|
||||
)
|
||||
|
||||
_globals = globals()
|
||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "qube_pb2", _globals)
|
||||
if not _descriptor._USE_C_DESCRIPTORS:
|
||||
DESCRIPTOR._loaded_options = None
|
||||
_globals["_QUBE_METADATAENTRY"]._loaded_options = None
|
||||
_globals["_QUBE_METADATAENTRY"]._serialized_options = b"8\001"
|
||||
_globals["_NDARRAY"]._serialized_start = 14
|
||||
_globals["_NDARRAY"]._serialized_end = 66
|
||||
_globals["_STRINGGROUP"]._serialized_start = 68
|
||||
_globals["_STRINGGROUP"]._serialized_end = 96
|
||||
_globals["_VALUEGROUP"]._serialized_start = 98
|
||||
_globals["_VALUEGROUP"]._serialized_end = 176
|
||||
_globals["_METADATAGROUP"]._serialized_start = 178
|
||||
_globals["_METADATAGROUP"]._serialized_end = 232
|
||||
_globals["_QUBE"]._serialized_start = 235
|
||||
_globals["_QUBE"]._serialized_end = 444
|
||||
_globals["_QUBE_METADATAENTRY"]._serialized_start = 381
|
||||
_globals["_QUBE_METADATAENTRY"]._serialized_end = 444
|
||||
# @@protoc_insertion_point(module_scope)
|
0
src/python/qubed/py.typed
Normal file
0
src/python/qubed/py.typed
Normal file
464
src/python/qubed/set_operations.py
Normal file
464
src/python/qubed/set_operations.py
Normal file
@ -0,0 +1,464 @@
|
||||
"""
|
||||
# Set Operations
|
||||
|
||||
The core of this is the observation that for two sets A and B, if we compute (A - B), (A ∩ B) amd (B - A)
|
||||
then we can get the other operations by taking unions of the above three objects.
|
||||
Union: All of them
|
||||
Intersection: Just take A ∩ B
|
||||
Difference: Take either A - B or B - A
|
||||
Symmetric Difference (XOR): Take A - B and B - A
|
||||
|
||||
We start with a shallow implementation of this algorithm that only deals with a pair of nodes, not the whole tree:
|
||||
|
||||
shallow_set_operation(A: Qube, B: Qube) -> SetOpsResult
|
||||
|
||||
This takes two qubes and (morally) returns (A - B), (A ∩ B) amd (B - A) but only for the values and metadata at the top level.
|
||||
|
||||
For technical reasons that will become clear we actually return a struct with two copies of (A ∩ B). One has the metadata from A and the children of A call it A', and the other has them from B call it B'. This is relevant when we extend the shallow algorithm to work with a whole tree because we will recurse and compute the set operation for each pair of the children of A' and B'.
|
||||
|
||||
NB: Currently there are two kinds of values, QEnums, that store a list of values and Wildcards that 'match with everything'. shallow_set_operation checks the type of values and dispatches to different methods depending on the combination of types it finds.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
# Prevent circular imports while allowing the type checker to know what Qube is
|
||||
from typing import TYPE_CHECKING, Any, Iterable
|
||||
|
||||
import numpy as np
|
||||
from frozendict import frozendict
|
||||
|
||||
from .value_types import QEnum, ValueGroup, WildcardGroup
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .Qube import Qube
|
||||
|
||||
|
||||
class SetOperation(Enum):
|
||||
"Map from set operations to which combination of (A - B), (A ∩ B), (B - A) we need."
|
||||
|
||||
UNION = (1, 1, 1)
|
||||
INTERSECTION = (0, 1, 0)
|
||||
DIFFERENCE = (1, 0, 0)
|
||||
SYMMETRIC_DIFFERENCE = (1, 0, 1)
|
||||
|
||||
|
||||
@dataclass(eq=True, frozen=True)
|
||||
class ValuesIndices:
|
||||
"Helper class to hold the values and indices from a node."
|
||||
|
||||
values: ValueGroup
|
||||
indices: tuple[int, ...]
|
||||
|
||||
@classmethod
|
||||
def from_values(cls, values: ValueGroup):
|
||||
return cls(values=values, indices=tuple(range(len(values))))
|
||||
|
||||
@classmethod
|
||||
def empty(cls):
|
||||
return cls(values=QEnum([]), indices=())
|
||||
|
||||
def enumerate(self) -> Iterable[tuple[Any, int]]:
|
||||
return zip(self.indices, self.values)
|
||||
|
||||
|
||||
def get_indices(
|
||||
metadata: frozendict[str, np.ndarray], indices: tuple[int, ...]
|
||||
) -> frozendict[str, np.ndarray]:
|
||||
"Given a metadata dict and some indices, return a new metadata dict with only the values indexed by the indices"
|
||||
return frozendict(
|
||||
{k: v[..., indices] for k, v in metadata.items() if isinstance(v, np.ndarray)}
|
||||
)
|
||||
|
||||
|
||||
@dataclass(eq=True, frozen=True)
|
||||
class SetOpResult:
|
||||
"""
|
||||
Given two sets A and B, all possible set operations can be constructed from A - B, A ∩ B, B - A
|
||||
That is, what's only in A, the intersection and what's only in B
|
||||
However because we need to recurse on children we actually return two intersection node:
|
||||
only_A is a qube with:
|
||||
The values in A but not in B
|
||||
The metadata corresponding to this values
|
||||
All the children A had
|
||||
|
||||
intersection_A is a qube with:
|
||||
The values that intersected with B
|
||||
The metadata from that intersection
|
||||
All the children A had
|
||||
|
||||
And vice versa for only_B and intersection B
|
||||
"""
|
||||
|
||||
only_A: ValuesIndices
|
||||
intersection_A: ValuesIndices
|
||||
intersection_B: ValuesIndices
|
||||
only_B: ValuesIndices
|
||||
|
||||
|
||||
def shallow_qenum_set_operation(A: ValuesIndices, B: ValuesIndices) -> SetOpResult:
|
||||
"""
|
||||
For two sets of values, partition the overlap into four groups:
|
||||
only_A: values and indices of values that are in A but not B
|
||||
intersection_A: values and indices of values that are in both A and B
|
||||
And vice versa for only_B and intersection_B.
|
||||
|
||||
Note that intersection_A and intersection_B contain the same values but the indices are different.
|
||||
"""
|
||||
|
||||
# create four groups that map value -> index
|
||||
only_A: dict[Any, int] = {val: i for i, val in A.enumerate()}
|
||||
only_B: dict[Any, int] = {val: i for i, val in B.enumerate()}
|
||||
intersection_A: dict[Any, int] = {}
|
||||
intersection_B: dict[Any, int] = {}
|
||||
|
||||
# Go through all the values and move any that are in the intersection
|
||||
# to the corresponding group, keeping the indices
|
||||
for val in A.values:
|
||||
if val in B.values:
|
||||
intersection_A[val] = only_A.pop(val)
|
||||
intersection_B[val] = only_B.pop(val)
|
||||
|
||||
def package(values_indices: dict[Any, int]) -> ValuesIndices:
|
||||
return ValuesIndices(
|
||||
values=QEnum(list(values_indices.keys())),
|
||||
indices=tuple(values_indices.values()),
|
||||
)
|
||||
|
||||
return SetOpResult(
|
||||
only_A=package(only_A),
|
||||
only_B=package(only_B),
|
||||
intersection_A=package(intersection_A),
|
||||
intersection_B=package(intersection_B),
|
||||
)
|
||||
|
||||
|
||||
def shallow_wildcard_set_operation(A: ValuesIndices, B: ValuesIndices) -> SetOpResult:
|
||||
"""
|
||||
WildcardGroups behave as if they contain all the values of whatever they match against.
|
||||
For two wildcards we just return both.
|
||||
For A == wildcard and B == enum we have to be more careful:
|
||||
1. All of B is in the intersection so only_B is None too.
|
||||
2. The wildcard may need to match against other things so only_A is A
|
||||
3. We return B in the intersection_B and intersection_A slot.
|
||||
|
||||
This last bit happens because the wildcard basically adopts the values of whatever it sees.
|
||||
"""
|
||||
# Two wildcard groups have full overlap.
|
||||
if isinstance(A.values, WildcardGroup) and isinstance(B.values, WildcardGroup):
|
||||
return SetOpResult(ValuesIndices.empty(), A, B, ValuesIndices.empty())
|
||||
|
||||
# If A is a wildcard matcher and B is not
|
||||
# then the intersection is everything from B
|
||||
if isinstance(A.values, WildcardGroup):
|
||||
return SetOpResult(A, B, B, ValuesIndices.empty())
|
||||
|
||||
# If B is a wildcard matcher and A is not
|
||||
# then the intersection is everything from A
|
||||
if isinstance(B.values, WildcardGroup):
|
||||
return SetOpResult(ValuesIndices.empty(), A, A, B)
|
||||
|
||||
raise NotImplementedError(
|
||||
f"One of {type(A.values)} and {type(B.values)} should be WildCardGroup"
|
||||
)
|
||||
|
||||
|
||||
def shallow_set_operation(
|
||||
A: ValuesIndices,
|
||||
B: ValuesIndices,
|
||||
) -> SetOpResult:
|
||||
if isinstance(A.values, QEnum) and isinstance(B.values, QEnum):
|
||||
return shallow_qenum_set_operation(A, B)
|
||||
|
||||
# WildcardGroups behave as if they contain all possible values.
|
||||
if isinstance(A.values, WildcardGroup) or isinstance(B.values, WildcardGroup):
|
||||
return shallow_wildcard_set_operation(A, B)
|
||||
|
||||
raise NotImplementedError(
|
||||
f"Set operations on values types {type(A.values)} and {type(B.values)} not yet implemented"
|
||||
)
|
||||
|
||||
|
||||
def operation(
|
||||
A: Qube, B: Qube, operation_type: SetOperation, node_type, depth=0
|
||||
) -> Qube | None:
|
||||
# print(f"operation({A}, {B})")
|
||||
assert A.key == B.key, (
|
||||
"The two Qube root nodes must have the same key to perform set operations,"
|
||||
f"would usually be two root nodes. They have {A.key} and {B.key} respectively"
|
||||
)
|
||||
node_key = A.key
|
||||
|
||||
assert A.is_root == B.is_root
|
||||
is_root = A.is_root
|
||||
|
||||
assert A.values == B.values, (
|
||||
f"The two Qube root nodes must have the same values to perform set operations {A.values = }, {B.values = }"
|
||||
)
|
||||
node_values = A.values
|
||||
|
||||
# Group the children of the two nodes by key
|
||||
nodes_by_key: defaultdict[str, tuple[list[Qube], list[Qube]]] = defaultdict(
|
||||
lambda: ([], [])
|
||||
)
|
||||
new_children: list[Qube] = []
|
||||
|
||||
# Sort out metadata into what can stay at this level and what must move down
|
||||
stayput_metadata: dict[str, np.ndarray] = {}
|
||||
pushdown_metadata_A: dict[str, np.ndarray] = {}
|
||||
pushdown_metadata_B: dict[str, np.ndarray] = {}
|
||||
for key in set(A.metadata.keys()) | set(B.metadata.keys()):
|
||||
if key not in A.metadata:
|
||||
pushdown_metadata_B[key] = B.metadata[key]
|
||||
continue
|
||||
|
||||
if key not in B.metadata:
|
||||
pushdown_metadata_A[key] = A.metadata[key]
|
||||
continue
|
||||
|
||||
A_val = A.metadata[key]
|
||||
B_val = B.metadata[key]
|
||||
if np.allclose(A_val, B_val):
|
||||
# print(f"{' ' * depth}Keeping metadata key '{key}' at this level")
|
||||
stayput_metadata[key] = A.metadata[key]
|
||||
else:
|
||||
# print(f"{' ' * depth}Pushing down metadata key '{key}' {A_val} {B_val}")
|
||||
pushdown_metadata_A[key] = A_val
|
||||
pushdown_metadata_B[key] = B_val
|
||||
|
||||
# Add all the metadata that needs to be pushed down to the child nodes
|
||||
# When pushing down the metadata we need to account for the fact it now affects more values
|
||||
# So expand the metadata entries from shape (a, b, ..., c) to (a, b, ..., c, d)
|
||||
# where d is the length of the node values
|
||||
for node in A.children:
|
||||
N = len(node.values)
|
||||
meta = {
|
||||
k: np.broadcast_to(v[..., np.newaxis], v.shape + (N,))
|
||||
for k, v in pushdown_metadata_A.items()
|
||||
}
|
||||
node = node.replace(metadata=node.metadata | meta)
|
||||
nodes_by_key[node.key][0].append(node)
|
||||
|
||||
for node in B.children:
|
||||
N = len(node.values)
|
||||
meta = {
|
||||
k: np.broadcast_to(v[..., np.newaxis], v.shape + (N,))
|
||||
for k, v in pushdown_metadata_B.items()
|
||||
}
|
||||
node = node.replace(metadata=node.metadata | meta)
|
||||
nodes_by_key[node.key][1].append(node)
|
||||
|
||||
# print(f"{nodes_by_key = }")
|
||||
|
||||
# For every node group, perform the set operation
|
||||
for key, (A_nodes, B_nodes) in nodes_by_key.items():
|
||||
output = list(
|
||||
_operation(A_nodes, B_nodes, operation_type, node_type, depth + 1)
|
||||
)
|
||||
# print(f"{' '*depth}_operation {operation_type.name} {A_nodes} {B_nodes} out = [{output}]")
|
||||
new_children.extend(output)
|
||||
|
||||
# print(f"{' '*depth}operation {operation_type.name} [{A}] [{B}] new_children = [{new_children}]")
|
||||
|
||||
# If there are now no children as a result of the operation, return nothing.
|
||||
if (A.children or B.children) and not new_children:
|
||||
if A.key == "root":
|
||||
return node_type.make_root(children=())
|
||||
else:
|
||||
return None
|
||||
|
||||
# Whenever we modify children we should recompress them
|
||||
# But since `operation` is already recursive, we only need to compress this level not all levels
|
||||
# Hence we use the non-recursive _compress method
|
||||
new_children = list(compress_children(new_children))
|
||||
|
||||
# The values and key are the same so we just replace the children
|
||||
if A.key == "root":
|
||||
return node_type.make_root(
|
||||
children=new_children,
|
||||
metadata=stayput_metadata,
|
||||
)
|
||||
return node_type.make_node(
|
||||
key=node_key,
|
||||
values=node_values,
|
||||
children=new_children,
|
||||
metadata=stayput_metadata,
|
||||
is_root=is_root,
|
||||
)
|
||||
|
||||
|
||||
def _operation(
|
||||
A: list[Qube],
|
||||
B: list[Qube],
|
||||
operation_type: SetOperation,
|
||||
node_type,
|
||||
depth: int,
|
||||
) -> Iterable[Qube]:
|
||||
"""
|
||||
This operation assumes that we've found two nodes that match and now want to do a set operation on their children. Hence we take in two lists of child nodes all of which have the same key but different values.
|
||||
We then loop over all pairs of children from each list and compute the intersection.
|
||||
"""
|
||||
# print(f"_operation({A}, {B})")
|
||||
keep_only_A, keep_intersection, keep_only_B = operation_type.value
|
||||
|
||||
# We're going to progressively remove values from the starting nodes as we do intersections
|
||||
# So we make a node -> ValuesIndices mapping here for both a and b
|
||||
only_a: dict[Qube, ValuesIndices] = {
|
||||
n: ValuesIndices.from_values(n.values) for n in A
|
||||
}
|
||||
only_b: dict[Qube, ValuesIndices] = {
|
||||
n: ValuesIndices.from_values(n.values) for n in B
|
||||
}
|
||||
|
||||
def make_new_node(source: Qube, values_indices: ValuesIndices):
|
||||
return source.replace(
|
||||
values=values_indices.values,
|
||||
metadata=get_indices(source.metadata, values_indices.indices),
|
||||
)
|
||||
|
||||
# Iterate over all pairs (node_A, node_B) and perform the shallow set operation
|
||||
# Update our copy of the original node to remove anything that appears in an intersection
|
||||
for node_a in A:
|
||||
for node_b in B:
|
||||
set_ops_result = shallow_set_operation(only_a[node_a], only_b[node_b])
|
||||
|
||||
# Save reduced values back to nodes
|
||||
only_a[node_a] = set_ops_result.only_A
|
||||
only_b[node_b] = set_ops_result.only_B
|
||||
|
||||
if (
|
||||
set_ops_result.intersection_A.values
|
||||
and set_ops_result.intersection_B.values
|
||||
):
|
||||
result = operation(
|
||||
make_new_node(node_a, set_ops_result.intersection_A),
|
||||
make_new_node(node_b, set_ops_result.intersection_B),
|
||||
operation_type,
|
||||
node_type,
|
||||
depth=depth + 1,
|
||||
)
|
||||
if result is not None:
|
||||
# If we're doing a difference or xor we might want to throw away the intersection
|
||||
# However we can only do this once we get to the leaf nodes, otherwise we'll
|
||||
# throw away nodes too early!
|
||||
# Consider Qube(root, a=1, b=1/2) - Qube(root, a=1, b=1)
|
||||
# We can easily throw away the whole a node by accident here!
|
||||
if keep_intersection or result.children:
|
||||
yield result
|
||||
elif (
|
||||
not set_ops_result.intersection_A.values
|
||||
and not set_ops_result.intersection_B.values
|
||||
):
|
||||
continue
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Only one of set_ops_result.intersection_A and set_ops_result.intersection_B is None, I didn't think that could happen! {set_ops_result = }"
|
||||
)
|
||||
|
||||
if keep_only_A:
|
||||
for node, vi in only_a.items():
|
||||
if vi.values:
|
||||
yield make_new_node(node, vi)
|
||||
|
||||
if keep_only_B:
|
||||
for node, vi in only_b.items():
|
||||
if vi.values:
|
||||
yield make_new_node(node, vi)
|
||||
|
||||
|
||||
def compress_children(children: Iterable[Qube], depth=0) -> tuple[Qube, ...]:
|
||||
"""
|
||||
Helper method tht only compresses a set of nodes, and doesn't do it recursively.
|
||||
Used in Qubed.compress but also to maintain compression in the set operations above.
|
||||
"""
|
||||
# Take the set of new children and see if any have identical key, metadata and children
|
||||
# the values may different and will be collapsed into a single node
|
||||
|
||||
identical_children = defaultdict(list)
|
||||
for child in children:
|
||||
# only care about the key and children of each node, ignore values
|
||||
h = hash((child.key, tuple((cc.structural_hash for cc in child.children))))
|
||||
identical_children[h].append(child)
|
||||
|
||||
# Now go through and create new compressed nodes for any groups that need collapsing
|
||||
new_children = []
|
||||
for child_list in identical_children.values():
|
||||
# If the group is size one just keep it
|
||||
if len(child_list) == 1:
|
||||
new_child = child_list.pop()
|
||||
|
||||
else:
|
||||
example = child_list[0]
|
||||
node_type = type(example)
|
||||
value_type = type(example.values)
|
||||
|
||||
assert all(isinstance(child.values, value_type) for child in child_list), (
|
||||
f"All nodes to be grouped must have the same value type, expected {value_type}"
|
||||
)
|
||||
|
||||
# We know the children of this group of nodes all have the same structure
|
||||
# but we still need to merge the metadata across them
|
||||
# children = example.children
|
||||
children = merge_metadata(child_list, example.depth)
|
||||
|
||||
# Do we need to recusively compress here?
|
||||
# children = compress_children(children, depth=depth+1)
|
||||
|
||||
if value_type is QEnum:
|
||||
values = QEnum(set(v for child in child_list for v in child.values))
|
||||
elif value_type is WildcardGroup:
|
||||
values = example.values
|
||||
else:
|
||||
raise ValueError(f"Unknown value type: {value_type}")
|
||||
|
||||
new_child = node_type.make_node(
|
||||
key=example.key,
|
||||
metadata=example.metadata,
|
||||
values=values,
|
||||
children=children,
|
||||
)
|
||||
|
||||
new_children.append(new_child)
|
||||
|
||||
return tuple(sorted(new_children, key=lambda n: ((n.key, n.values.min()))))
|
||||
|
||||
|
||||
def merge_metadata(qubes: list[Qube], axis) -> Iterable[Qube]:
|
||||
"""
|
||||
Given a list of qubes with identical structure,
|
||||
match up the children of each node and merge the metadata
|
||||
"""
|
||||
# Group the children of each qube and merge them
|
||||
# Exploit the fact that they have the same shape and ordering
|
||||
example = qubes[0]
|
||||
node_type = type(example)
|
||||
|
||||
for i in range(len(example.children)):
|
||||
group = [q.children[i] for q in qubes]
|
||||
group_example = group[0]
|
||||
assert len(set((c.structural_hash for c in group))) == 1
|
||||
|
||||
# Collect metadata by key
|
||||
metadata_groups = {
|
||||
k: [q.metadata[k] for q in group] for k in group_example.metadata.keys()
|
||||
}
|
||||
|
||||
# Concatenate the metadata together
|
||||
metadata: frozendict[str, np.ndarray] = frozendict(
|
||||
{
|
||||
k: np.concatenate(metadata_group, axis=axis)
|
||||
for k, metadata_group in metadata_groups.items()
|
||||
}
|
||||
)
|
||||
|
||||
group_children = merge_metadata(group, axis)
|
||||
yield node_type.make_node(
|
||||
key=group_example.key,
|
||||
metadata=metadata,
|
||||
values=group_example.values,
|
||||
children=group_children,
|
||||
)
|
271
src/python/qubed/tree_formatters.py
Normal file
271
src/python/qubed/tree_formatters.py
Normal file
@ -0,0 +1,271 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Callable, Iterable
|
||||
|
||||
try:
|
||||
from IPython.display import display
|
||||
except ImportError:
|
||||
display = None
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .Qube import Qube
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HTML:
|
||||
html: str
|
||||
|
||||
def _repr_html_(self):
|
||||
return self.html
|
||||
|
||||
|
||||
def summarize_node(
|
||||
node: Qube, collapse=False, max_summary_length=50, **kwargs
|
||||
) -> tuple[str, str, Qube]:
|
||||
"""
|
||||
Extracts a summarized representation of the node while collapsing single-child paths.
|
||||
Returns the summary string and the last node in the chain that has multiple children.
|
||||
"""
|
||||
summaries = []
|
||||
paths = []
|
||||
|
||||
while True:
|
||||
summary = node.summary(**kwargs)
|
||||
|
||||
paths.append(summary)
|
||||
if len(summary) > max_summary_length:
|
||||
summary = summary[:max_summary_length] + "..."
|
||||
summaries.append(summary)
|
||||
if not collapse:
|
||||
break
|
||||
|
||||
# Move down if there's exactly one child, otherwise stop
|
||||
if len(node.children) != 1:
|
||||
break
|
||||
node = node.children[0]
|
||||
|
||||
# Add a "..." to represent nodes that we don't know about
|
||||
if (not node.children) and (not node.is_leaf):
|
||||
summaries.append("...")
|
||||
|
||||
return ", ".join(summaries), ",".join(paths), node
|
||||
|
||||
|
||||
def node_tree_to_string(node: Qube, prefix: str = "", depth=None) -> Iterable[str]:
|
||||
summary, path, node = summarize_node(node)
|
||||
|
||||
if depth is not None and depth <= 0:
|
||||
yield summary + " - ...\n"
|
||||
return
|
||||
# Special case for nodes with only a single child, this makes the printed representation more compact
|
||||
elif len(node.children) == 1:
|
||||
yield summary + ", "
|
||||
yield from node_tree_to_string(node.children[0], prefix, depth=depth)
|
||||
return
|
||||
else:
|
||||
yield summary + "\n"
|
||||
|
||||
for index, child in enumerate(node.children):
|
||||
connector = "└── " if index == len(node.children) - 1 else "├── "
|
||||
yield prefix + connector
|
||||
extension = " " if index == len(node.children) - 1 else "│ "
|
||||
yield from node_tree_to_string(
|
||||
child, prefix + extension, depth=depth - 1 if depth is not None else None
|
||||
)
|
||||
|
||||
|
||||
def summarize_node_html(
|
||||
node: Qube,
|
||||
collapse=False,
|
||||
max_summary_length=50,
|
||||
info: Callable[[Qube], str] | None = None,
|
||||
**kwargs,
|
||||
) -> tuple[str, Qube]:
|
||||
"""
|
||||
Extracts a summarized representation of the node while collapsing single-child paths.
|
||||
Returns the summary string and the last node in the chain that has multiple children.
|
||||
"""
|
||||
if info is None:
|
||||
|
||||
def info_func(node: Qube, /):
|
||||
return (
|
||||
# f"dtype: {node.dtype}\n"
|
||||
f"metadata: {dict(node.metadata)}\n"
|
||||
)
|
||||
else:
|
||||
info_func = info
|
||||
|
||||
summaries = []
|
||||
|
||||
while True:
|
||||
path = node.summary(**kwargs)
|
||||
summary = path
|
||||
|
||||
if len(summary) > max_summary_length:
|
||||
summary = summary[:max_summary_length] + "..."
|
||||
|
||||
info_string = info_func(node)
|
||||
|
||||
summary = f'<span class="qubed-node" data-path="{path}" title="{info_string}">{summary}</span>'
|
||||
summaries.append(summary)
|
||||
if not collapse:
|
||||
break
|
||||
|
||||
# Move down if there's exactly one child, otherwise stop
|
||||
if len(node.children) != 1:
|
||||
break
|
||||
node = node.children[0]
|
||||
|
||||
if (not node.children) and (not node.is_leaf):
|
||||
summary = (
|
||||
'<span class="qubed-node" data-path="" title="Truncated Nodes">...</span>'
|
||||
)
|
||||
summaries.append(summary)
|
||||
|
||||
return ", ".join(summaries), node
|
||||
|
||||
|
||||
def _node_tree_to_html(
|
||||
node: Qube,
|
||||
prefix: str = "",
|
||||
depth=1,
|
||||
connector="",
|
||||
info: Callable[[Qube], str] | None = None,
|
||||
**kwargs,
|
||||
) -> Iterable[str]:
|
||||
summary, node = summarize_node_html(node, info=info, **kwargs)
|
||||
|
||||
if len(node.children) == 0:
|
||||
yield f'<span class="qubed-level">{connector}{summary}</span>'
|
||||
return
|
||||
else:
|
||||
open = "open" if depth > 0 else ""
|
||||
yield f'<details {open}><summary class="qubed-level">{connector}{summary}</summary>'
|
||||
|
||||
for index, child in enumerate(node.children):
|
||||
connector = "└── " if index == len(node.children) - 1 else "├── "
|
||||
extension = " " if index == len(node.children) - 1 else "│ "
|
||||
yield from _node_tree_to_html(
|
||||
child,
|
||||
prefix + extension,
|
||||
depth=depth - 1,
|
||||
connector=prefix + connector,
|
||||
info=info,
|
||||
**kwargs,
|
||||
)
|
||||
yield "</details>"
|
||||
|
||||
|
||||
def node_tree_to_html(
|
||||
node: Qube,
|
||||
depth=1,
|
||||
include_css=True,
|
||||
include_js=True,
|
||||
css_id=None,
|
||||
info: Callable[[Qube], str] | None = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
if css_id is None:
|
||||
css_id = f"qubed-tree-{random.randint(0, 1000000)}"
|
||||
|
||||
# It's ugle to use an f string here because css uses {} so much so instead
|
||||
# we use CSS_ID as a placeholder and replace it later
|
||||
css = """
|
||||
<style>
|
||||
pre#CSS_ID {
|
||||
font-family: monospace;
|
||||
white-space: pre;
|
||||
font-family: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;
|
||||
font-size: 12px;
|
||||
line-height: 1.4;
|
||||
|
||||
details {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
.qubed-level a {
|
||||
margin-left: 10px;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
summary {
|
||||
list-style: none;
|
||||
cursor: pointer;
|
||||
text-overflow: ellipsis;
|
||||
overflow: hidden;
|
||||
text-wrap: nowrap;
|
||||
display: block;
|
||||
}
|
||||
|
||||
span.qubed-node:hover {
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
details > summary::after {
|
||||
content: ' ▲';
|
||||
}
|
||||
|
||||
details:not([open]) > summary::after {
|
||||
content: " ▼";
|
||||
}
|
||||
|
||||
.qubed-level {
|
||||
text-overflow: ellipsis;
|
||||
overflow: hidden;
|
||||
text-wrap: nowrap;
|
||||
display: block;
|
||||
}
|
||||
|
||||
summary::-webkit-details-marker {
|
||||
display: none;
|
||||
content: "";
|
||||
}
|
||||
|
||||
}
|
||||
</style>
|
||||
""".replace("CSS_ID", css_id)
|
||||
|
||||
# This js snippet copies the path of a node to the clipboard when clicked
|
||||
js = """
|
||||
<script type="module" defer>
|
||||
async function nodeOnClick(event) {
|
||||
if (!event.altKey) return;
|
||||
event.preventDefault();
|
||||
let current_element = this.parentElement;
|
||||
let paths = [];
|
||||
while (true) {
|
||||
if (current_element.dataset.path) {
|
||||
paths.push(current_element.dataset.path);
|
||||
}
|
||||
current_element = current_element.parentElement;
|
||||
if (current_element.tagName == "PRE") break;
|
||||
}
|
||||
const path = paths.reverse().slice(1).join(",");
|
||||
await navigator.clipboard.writeText(path);
|
||||
}
|
||||
|
||||
const nodes = document.querySelectorAll("#CSS_ID.qubed-node");
|
||||
nodes.forEach(n => n.addEventListener("click", nodeOnClick));
|
||||
</script>
|
||||
""".replace("CSS_ID", css_id)
|
||||
nodes = "".join(_node_tree_to_html(node=node, depth=depth, info=info, **kwargs))
|
||||
return f"{js if include_js else ''}{css if include_css else ''}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"
|
||||
|
||||
|
||||
def _display(qube: Qube, **kwargs):
|
||||
if display is None:
|
||||
print(qube)
|
||||
else:
|
||||
|
||||
def info(node: Qube):
|
||||
return f"""\
|
||||
structural_hash = {node.structural_hash}
|
||||
metadata = {dict(node.metadata)}
|
||||
is_root = {node.is_root}
|
||||
is_leaf = {node.is_leaf}
|
||||
"""
|
||||
|
||||
kwargs = {"info": info} | kwargs
|
||||
display(qube.html(**kwargs))
|
40
src/python/qubed/trie.py
Normal file
40
src/python/qubed/trie.py
Normal file
@ -0,0 +1,40 @@
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
character = str
|
||||
|
||||
|
||||
@dataclass(unsafe_hash=True)
|
||||
class TrieNode:
|
||||
parent: "TrieNode | None"
|
||||
parent_char: character
|
||||
children: dict[character, "TrieNode"] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Trie:
|
||||
root: TrieNode = field(default_factory=lambda: TrieNode(None, ""))
|
||||
reverse_lookup: dict[int, TrieNode] = field(default_factory=dict)
|
||||
|
||||
def insert(self, word: str):
|
||||
node = self.root
|
||||
for char in word:
|
||||
if char not in node.children:
|
||||
new_node = TrieNode(node, char)
|
||||
node.children[char] = new_node
|
||||
|
||||
node = node.children[char]
|
||||
|
||||
n_id = id(node)
|
||||
if n_id not in self.reverse_lookup:
|
||||
self.reverse_lookup[n_id] = node
|
||||
|
||||
return n_id
|
||||
|
||||
def lookup_by_id(self, n_id: int):
|
||||
leaf_node = self.reverse_lookup[n_id]
|
||||
string = []
|
||||
while leaf_node.parent is not None:
|
||||
string.append(leaf_node.parent_char)
|
||||
leaf_node = leaf_node.parent
|
||||
|
||||
return "".join(reversed(string))
|
443
src/python/qubed/value_types.py
Normal file
443
src/python/qubed/value_types.py
Normal file
@ -0,0 +1,443 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime, timedelta
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
FrozenSet,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Literal,
|
||||
Sequence,
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .Qube import Qube
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ValueGroup(ABC):
|
||||
@abstractmethod
|
||||
def dtype(self) -> str:
|
||||
"Provide a string rep of the datatype of these values"
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def summary(self) -> str:
|
||||
"Provide a string summary of the value group."
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
"Given a value, coerce to the value type and determine if it is in the value group."
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def to_json(self) -> dict:
|
||||
"Return a JSON serializable representation of the value group."
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def min(self):
|
||||
"Return the minimum value in the group."
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def from_strings(cls, values: Iterable[str]) -> Sequence[ValueGroup]:
|
||||
"Given a list of strings, return a one or more ValueGroups of this type."
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def __iter__(self) -> Iterator:
|
||||
"Iterate over the values in the group."
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def __len__(self) -> int:
|
||||
pass
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
EnumValuesType = FrozenSet[T]
|
||||
|
||||
_dtype_map: dict[str, type] = {
|
||||
"str": str,
|
||||
"int64": int,
|
||||
"float64": float,
|
||||
"date": datetime,
|
||||
}
|
||||
_dtype_map_inv: dict[type, str] = {v: k for k, v in _dtype_map.items()}
|
||||
_dtype_formatters = {
|
||||
"str": str,
|
||||
"int64": int,
|
||||
"float64": float,
|
||||
"date": datetime.fromisoformat,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True, order=True)
|
||||
class QEnum(ValueGroup):
|
||||
"""
|
||||
The simplest kind of key value is just a list of strings.
|
||||
summary -> string1/string2/string....
|
||||
"""
|
||||
|
||||
values: EnumValuesType
|
||||
_dtype: str = "str"
|
||||
|
||||
def __init__(self, obj, dtype="str"):
|
||||
object.__setattr__(self, "values", tuple(sorted(obj)))
|
||||
object.__setattr__(
|
||||
self,
|
||||
"_dtype",
|
||||
dtype,
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
assert isinstance(self.values, tuple)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.values)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.values)
|
||||
|
||||
def summary(self) -> str:
|
||||
return "/".join(map(str, sorted(self.values)))
|
||||
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
return value in self.values
|
||||
|
||||
def dtype(self):
|
||||
return self._dtype
|
||||
|
||||
@classmethod
|
||||
def from_strings(cls, values: Iterable[str]) -> Sequence[ValueGroup]:
|
||||
return [cls(tuple(values))]
|
||||
|
||||
def min(self):
|
||||
return min(self.values)
|
||||
|
||||
def to_json(self):
|
||||
return {"type": "enum", "dtype": self.dtype(), "values": self.values}
|
||||
|
||||
# @classmethod
|
||||
# def from_json(cls, type: Literal["enum"], dtype: str, values: list):
|
||||
# dtype_formatter = _dtype_formatters[dtype]
|
||||
|
||||
@classmethod
|
||||
def from_list(cls, obj):
|
||||
example = obj[0]
|
||||
dtype = type(example)
|
||||
assert [type(v) is dtype for v in obj]
|
||||
return cls(obj, dtype=_dtype_map_inv[dtype])
|
||||
|
||||
|
||||
@dataclass(frozen=True, order=True)
|
||||
class WildcardGroup(ValueGroup):
|
||||
def summary(self) -> str:
|
||||
return "*"
|
||||
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
return True
|
||||
|
||||
def to_json(self):
|
||||
return "*"
|
||||
|
||||
def min(self):
|
||||
return "*"
|
||||
|
||||
def __len__(self):
|
||||
return 1
|
||||
|
||||
def __iter__(self):
|
||||
return ["*"]
|
||||
|
||||
def __bool__(self):
|
||||
return True
|
||||
|
||||
def dtype(self):
|
||||
return "*"
|
||||
|
||||
@classmethod
|
||||
def from_strings(cls, values: Iterable[str]) -> Sequence[ValueGroup]:
|
||||
return [WildcardGroup()]
|
||||
|
||||
|
||||
class DateEnum(QEnum):
|
||||
def summary(self) -> str:
|
||||
def fmt(d):
|
||||
return d.strftime("%Y%m%d")
|
||||
|
||||
return "/".join(map(fmt, sorted(self.values)))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Range(ValueGroup, ABC):
|
||||
dtype: str = dataclasses.field(kw_only=True)
|
||||
|
||||
start: Any
|
||||
end: Any
|
||||
step: Any
|
||||
|
||||
def min(self):
|
||||
return self.start
|
||||
|
||||
def __iter__(self) -> Iterator[Any]:
|
||||
i = self.start
|
||||
while i <= self.end:
|
||||
yield i
|
||||
i += self.step
|
||||
|
||||
def to_json(self):
|
||||
return dataclasses.asdict(self)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DateRange(Range):
|
||||
start: date
|
||||
end: date
|
||||
step: timedelta
|
||||
dtype: Literal["date"] = dataclasses.field(kw_only=True, default="date")
|
||||
|
||||
def __len__(self) -> int:
|
||||
return (self.end - self.start) // self.step
|
||||
|
||||
def __iter__(self) -> Iterator[date]:
|
||||
current = self.start
|
||||
while current <= self.end if self.step.days > 0 else current >= self.end:
|
||||
yield current
|
||||
current += self.step
|
||||
|
||||
@classmethod
|
||||
def from_strings(cls, values: Iterable[str]) -> Sequence[DateRange | DateEnum]:
|
||||
dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values])
|
||||
if len(dates) < 2:
|
||||
return [DateEnum(dates)]
|
||||
|
||||
ranges: list[DateEnum | DateRange] = []
|
||||
current_group, dates = (
|
||||
[
|
||||
dates[0],
|
||||
],
|
||||
dates[1:],
|
||||
)
|
||||
current_type: Literal["enum", "range"] = "enum"
|
||||
while len(dates) > 1:
|
||||
if current_type == "range":
|
||||
# If the next date fits then add it to the current range
|
||||
if dates[0] - current_group[-1] == timedelta(days=1):
|
||||
current_group.append(dates.pop(0))
|
||||
|
||||
# Emit the current range and start a new one
|
||||
else:
|
||||
if len(current_group) == 1:
|
||||
ranges.append(DateEnum(current_group))
|
||||
else:
|
||||
ranges.append(
|
||||
DateRange(
|
||||
start=current_group[0],
|
||||
end=current_group[-1],
|
||||
step=timedelta(days=1),
|
||||
)
|
||||
)
|
||||
current_group = [
|
||||
dates.pop(0),
|
||||
]
|
||||
current_type = "enum"
|
||||
|
||||
if current_type == "enum":
|
||||
# If the next date is one more than the last then switch to range mode
|
||||
if dates[0] - current_group[-1] == timedelta(days=1):
|
||||
last = current_group.pop()
|
||||
if current_group:
|
||||
ranges.append(DateEnum(current_group))
|
||||
current_group = [last, dates.pop(0)]
|
||||
current_type = "range"
|
||||
|
||||
else:
|
||||
current_group.append(dates.pop(0))
|
||||
|
||||
# Handle remaining `current_group`
|
||||
if current_group:
|
||||
if current_type == "range":
|
||||
ranges.append(
|
||||
DateRange(
|
||||
start=current_group[0],
|
||||
end=current_group[-1],
|
||||
step=timedelta(days=1),
|
||||
)
|
||||
)
|
||||
else:
|
||||
ranges.append(DateEnum(current_group))
|
||||
|
||||
return ranges
|
||||
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
v = datetime.strptime(value, "%Y%m%d").date()
|
||||
return self.start <= v <= self.end and (v - self.start) % self.step == 0
|
||||
|
||||
def summary(self) -> str:
|
||||
def fmt(d):
|
||||
return d.strftime("%Y%m%d")
|
||||
|
||||
if self.step == timedelta(days=0):
|
||||
return f"{fmt(self.start)}"
|
||||
if self.step == timedelta(days=1):
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}"
|
||||
|
||||
return (
|
||||
f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TimeRange(Range):
|
||||
start: int
|
||||
end: int
|
||||
step: int
|
||||
dtype: Literal["time"] = dataclasses.field(kw_only=True, default="time")
|
||||
|
||||
def min(self):
|
||||
return self.start
|
||||
|
||||
def __iter__(self) -> Iterator[Any]:
|
||||
return super().__iter__()
|
||||
|
||||
@classmethod
|
||||
def from_strings(self, values: Iterable[str]) -> list["TimeRange"]:
|
||||
times = sorted([int(v) for v in values])
|
||||
if len(times) < 2:
|
||||
return [TimeRange(start=times[0], end=times[0], step=100)]
|
||||
|
||||
ranges = []
|
||||
current_range, times = (
|
||||
[
|
||||
times[0],
|
||||
],
|
||||
times[1:],
|
||||
)
|
||||
while len(times) > 1:
|
||||
if times[0] - current_range[-1] == 1:
|
||||
current_range.append(times.pop(0))
|
||||
|
||||
elif len(current_range) == 1:
|
||||
ranges.append(
|
||||
TimeRange(start=current_range[0], end=current_range[0], step=0)
|
||||
)
|
||||
current_range = [
|
||||
times.pop(0),
|
||||
]
|
||||
|
||||
else:
|
||||
ranges.append(
|
||||
TimeRange(start=current_range[0], end=current_range[-1], step=1)
|
||||
)
|
||||
current_range = [
|
||||
times.pop(0),
|
||||
]
|
||||
return ranges
|
||||
|
||||
def __len__(self) -> int:
|
||||
return (self.end - self.start) // self.step
|
||||
|
||||
def summary(self) -> str:
|
||||
def fmt(d):
|
||||
return f"{d:04d}"
|
||||
|
||||
if self.step == 0:
|
||||
return f"{fmt(self.start)}"
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
|
||||
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
v = int(value)
|
||||
return self.start <= v <= self.end and (v - self.start) % self.step == 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IntRange(Range):
|
||||
start: int
|
||||
end: int
|
||||
step: int
|
||||
dtype: Literal["int"] = dataclasses.field(kw_only=True, default="int")
|
||||
|
||||
def __len__(self) -> int:
|
||||
return (self.end - self.start) // self.step
|
||||
|
||||
def summary(self) -> str:
|
||||
def fmt(d):
|
||||
return d
|
||||
|
||||
if self.step == 0:
|
||||
return f"{fmt(self.start)}"
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
|
||||
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
v = int(value)
|
||||
return self.start <= v <= self.end and (v - self.start) % self.step == 0
|
||||
|
||||
@classmethod
|
||||
def from_strings(self, values: Iterable[str]) -> list["IntRange"]:
|
||||
ints = sorted([int(v) for v in values])
|
||||
if len(ints) < 2:
|
||||
return [IntRange(start=ints[0], end=ints[0], step=0)]
|
||||
|
||||
ranges = []
|
||||
current_range, ints = (
|
||||
[
|
||||
ints[0],
|
||||
],
|
||||
ints[1:],
|
||||
)
|
||||
while len(ints) > 1:
|
||||
if ints[0] - current_range[-1] == 1:
|
||||
current_range.append(ints.pop(0))
|
||||
|
||||
elif len(current_range) == 1:
|
||||
ranges.append(
|
||||
IntRange(start=current_range[0], end=current_range[0], step=0)
|
||||
)
|
||||
current_range = [
|
||||
ints.pop(0),
|
||||
]
|
||||
|
||||
else:
|
||||
ranges.append(
|
||||
IntRange(start=current_range[0], end=current_range[-1], step=1)
|
||||
)
|
||||
current_range = [
|
||||
ints.pop(0),
|
||||
]
|
||||
return ranges
|
||||
|
||||
|
||||
def values_from_json(obj: dict | list) -> ValueGroup:
|
||||
if isinstance(obj, list):
|
||||
return QEnum.from_list(obj)
|
||||
|
||||
match obj["type"]:
|
||||
case "enum":
|
||||
QEnum.from_json(**obj)
|
||||
case _:
|
||||
raise ValueError(f"Unknown dtype {obj['dtype']}")
|
||||
|
||||
|
||||
def convert_datatypes(q: "Qube", conversions: dict[str, ValueGroup]) -> "Qube":
|
||||
def _convert(q: "Qube") -> Iterator["Qube"]:
|
||||
if q.key in conversions:
|
||||
data_type = conversions[q.key]
|
||||
assert isinstance(q.values, QEnum), (
|
||||
"Only QEnum values can be converted to other datatypes."
|
||||
)
|
||||
for values_group in data_type.from_strings(q.values):
|
||||
# print(values_group)
|
||||
yield q.replace(values=values_group)
|
||||
else:
|
||||
yield q
|
||||
|
||||
return q.transform(_convert)
|
32
src/qube.proto
Normal file
32
src/qube.proto
Normal file
@ -0,0 +1,32 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message NdArray {
|
||||
repeated int64 shape = 1;
|
||||
string dtype = 2;
|
||||
bytes raw = 3;
|
||||
}
|
||||
|
||||
message StringGroup {repeated string items = 1; }
|
||||
|
||||
// Stores values i.e class=1/2/3 the 1/2/3 part
|
||||
message ValueGroup {
|
||||
oneof payload {
|
||||
StringGroup s = 1;
|
||||
NdArray tensor = 2;
|
||||
}
|
||||
}
|
||||
|
||||
message MetadataGroup {
|
||||
oneof payload {
|
||||
NdArray tensor = 1;
|
||||
}
|
||||
}
|
||||
|
||||
message Qube {
|
||||
string key = 1;
|
||||
ValueGroup values = 2;
|
||||
map<string, MetadataGroup> metadata = 3;
|
||||
string dtype = 4;
|
||||
repeated Qube children = 5;
|
||||
bool is_root = 6;
|
||||
}
|
76
src/rust/connectors/fdb.rs
Normal file
76
src/rust/connectors/fdb.rs
Normal file
@ -0,0 +1,76 @@
|
||||
use rsfdb::listiterator::KeyValueLevel;
|
||||
use rsfdb::request::Request;
|
||||
use rsfdb::FDB;
|
||||
|
||||
use serde_json::{json, Value};
|
||||
use std::time::Instant;
|
||||
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub mod tree;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use tree::TreeNode;
|
||||
|
||||
#[pyclass(unsendable)]
|
||||
pub struct PyFDB {
|
||||
pub fdb: FDB,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl PyFDB {
|
||||
#[new]
|
||||
#[pyo3(signature = (fdb_config=None))]
|
||||
pub fn new(fdb_config: Option<&str>) -> PyResult<Self> {
|
||||
let fdb = FDB::new(fdb_config)
|
||||
.map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(e.to_string()))?;
|
||||
Ok(PyFDB { fdb })
|
||||
}
|
||||
|
||||
/// Traverse the FDB with the given request.
|
||||
pub fn traverse_fdb(
|
||||
&self,
|
||||
py: Python<'_>,
|
||||
request: HashMap<String, Vec<String>>,
|
||||
) -> PyResult<PyObject> {
|
||||
let start_time = Instant::now();
|
||||
|
||||
let list_request = Request::from_json(json!(request))
|
||||
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
|
||||
|
||||
// Use `fdb_guard` instead of `self.fdb`
|
||||
let list = self
|
||||
.fdb
|
||||
.list(&list_request, true, true)
|
||||
.map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(e.to_string()))?;
|
||||
|
||||
let mut root = TreeNode::new(KeyValueLevel {
|
||||
key: "root".to_string(),
|
||||
value: "root".to_string(),
|
||||
level: 0,
|
||||
});
|
||||
|
||||
for item in list {
|
||||
py.check_signals()?;
|
||||
|
||||
if let Some(request) = &item.request {
|
||||
root.insert(&request);
|
||||
}
|
||||
}
|
||||
|
||||
let duration = start_time.elapsed();
|
||||
println!("Total runtime: {:?}", duration);
|
||||
|
||||
let py_dict = root.to_py_dict(py)?;
|
||||
Ok(py_dict)
|
||||
}
|
||||
}
|
||||
|
||||
use pyo3::prelude::*;
|
||||
|
||||
#[pymodule]
|
||||
fn rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_class::<PyFDB>()?;
|
||||
Ok(())
|
||||
}
|
147
src/rust/formatters/mod.rs
Normal file
147
src/rust/formatters/mod.rs
Normal file
@ -0,0 +1,147 @@
|
||||
use crate::{Node, NodeId, Qube};
|
||||
use itertools::Itertools;
|
||||
use itertools::Position;
|
||||
|
||||
impl Node {
|
||||
/// Generate a human readable summary of the node
|
||||
/// Examples include: key=value1/value2/.../valueN, key=value1/to/value1, key=*, root etc
|
||||
pub fn summary(&self, qube: &Qube) -> String {
|
||||
if self.is_root() {
|
||||
return "root".to_string();
|
||||
}
|
||||
let key = &qube[self.key];
|
||||
let values: String =
|
||||
Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect();
|
||||
|
||||
format!("{}={}", key, values)
|
||||
}
|
||||
|
||||
pub fn html_summary(&self, qube: &Qube) -> String {
|
||||
if self.is_root() {
|
||||
return r#"<span class="qubed-node">root</span>"#.to_string();
|
||||
}
|
||||
let key = &qube[self.key];
|
||||
let values: String =
|
||||
Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect();
|
||||
|
||||
let summary = format!("{}={}", key, values);
|
||||
let path = summary.clone();
|
||||
let info = format!("is_root: {}", self.is_root());
|
||||
format!(r#"<span class="qubed-node" data-path="{path}" title="{info}">{summary}</span>"#)
|
||||
}
|
||||
}
|
||||
|
||||
struct NodeSummary {
|
||||
summary: String,
|
||||
end: NodeId,
|
||||
}
|
||||
|
||||
enum SummaryType {
|
||||
PlainText,
|
||||
HTML,
|
||||
}
|
||||
|
||||
/// Given a Node, traverse the tree until a node has more than one child.
|
||||
/// Returns a summary of the form "key1=v1/v2, key2=v1/v2/v3, key3=v1"
|
||||
/// and the id of the last node in the summary
|
||||
fn summarise_nodes(qube: &Qube, node_id: &NodeId, summary_type: SummaryType) -> NodeSummary {
|
||||
let mut node_id = *node_id;
|
||||
let mut summary_vec = vec![];
|
||||
loop {
|
||||
let node = &qube[node_id];
|
||||
let summary = match summary_type {
|
||||
SummaryType::PlainText => node.summary(&qube),
|
||||
SummaryType::HTML => node.html_summary(&qube),
|
||||
};
|
||||
summary_vec.push(summary);
|
||||
|
||||
// Bail out if the node has anothing other than 1 child.
|
||||
match node.has_exactly_one_child() {
|
||||
Some(n) => node_id = n,
|
||||
None => break,
|
||||
};
|
||||
}
|
||||
NodeSummary {
|
||||
summary: summary_vec.join(", "),
|
||||
end: node_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn qube_to_tree(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String {
|
||||
let NodeSummary {
|
||||
summary,
|
||||
end: node_id,
|
||||
} = summarise_nodes(qube, node_id, SummaryType::PlainText);
|
||||
|
||||
let mut output: Vec<String> = Vec::new();
|
||||
|
||||
if depth <= 0 {
|
||||
return format!("{} - ...\n", summary);
|
||||
} else {
|
||||
output.push(format!("{}\n", summary));
|
||||
}
|
||||
|
||||
let node = &qube[node_id];
|
||||
for (position, child_id) in node.children().with_position() {
|
||||
let (connector, extension) = match position {
|
||||
Position::Last | Position::Only => ("└── ", " "),
|
||||
_ => ("├── ", "│ "),
|
||||
};
|
||||
output.extend([
|
||||
prefix.to_string(),
|
||||
connector.to_string(),
|
||||
qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1),
|
||||
]);
|
||||
}
|
||||
|
||||
output.join("")
|
||||
}
|
||||
|
||||
fn qube_to_html(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String {
|
||||
let NodeSummary {
|
||||
summary,
|
||||
end: node_id,
|
||||
} = summarise_nodes(qube, node_id, SummaryType::PlainText);
|
||||
|
||||
let node = &qube[node_id];
|
||||
let mut output: Vec<String> = Vec::new();
|
||||
|
||||
let open = if depth > 0 { "open" } else { "" };
|
||||
output.push(format!(
|
||||
r#"<details {open}><summary class="qubed-level">{summary}</summary>"#
|
||||
));
|
||||
|
||||
for (position, child_id) in node.children().with_position() {
|
||||
let (connector, extension) = match position {
|
||||
Position::Last | Position::Only => ("└── ", " "),
|
||||
_ => ("├── ", "│ "),
|
||||
};
|
||||
output.extend([
|
||||
prefix.to_string(),
|
||||
connector.to_string(),
|
||||
qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1),
|
||||
]);
|
||||
}
|
||||
|
||||
output.join("")
|
||||
}
|
||||
|
||||
impl Qube {
|
||||
/// Return a string version of the Qube in the format
|
||||
/// root
|
||||
/// ├── class=od, expver=0001/0002, param=1/2
|
||||
/// └── class=rd, param=1/2/3
|
||||
pub fn string_tree(&self) -> String {
|
||||
qube_to_tree(&self, &self.root, "", 5)
|
||||
}
|
||||
|
||||
/// Return an HTML version of the Qube which renders like this
|
||||
/// root
|
||||
/// ├── class=od, expver=0001/0002, param=1/2
|
||||
/// └── class=rd, param=1/2/3
|
||||
/// But under the hood children are represented with a details/summary tag and each key=value is a span
|
||||
/// CSS and JS functionality is bundled inside.
|
||||
pub fn html_tree(&self) -> String {
|
||||
qube_to_html(&self, &self.root, "", 5)
|
||||
}
|
||||
}
|
235
src/rust/lib.rs
Normal file
235
src/rust/lib.rs
Normal file
@ -0,0 +1,235 @@
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::wrap_pyfunction;
|
||||
use pyo3::types::{PyDict, PyInt, PyList, PyString};
|
||||
use python_interface::QubeError;
|
||||
use std::collections::HashMap;
|
||||
use std::iter;
|
||||
use pyo3::prelude::*;
|
||||
use std::hash::Hash;
|
||||
use std::rc::Rc;
|
||||
|
||||
use lasso::{Rodeo, Spur};
|
||||
use std::num::NonZero;
|
||||
use std::ops;
|
||||
|
||||
mod serialisation;
|
||||
mod python_interface;
|
||||
mod formatters;
|
||||
mod set_operations;
|
||||
|
||||
// This data structure uses the Newtype Index Pattern
|
||||
// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html
|
||||
// See also https://github.com/nrc/r4cppp/blob/master/graphs/README.md#rcrefcellnode for a discussion of other approaches to trees and graphs in rust.
|
||||
// https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/
|
||||
|
||||
// Index types use struct Id(NonZero<usize>)
|
||||
// This reserves 0 as a special value which allows Option<Id(NonZero<usize>)> to be the same size as usize.
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
|
||||
pub(crate) struct NodeId(NonZero<usize>);
|
||||
|
||||
// Allow node indices to index directly into Qubes:
|
||||
impl ops::Index<NodeId> for Qube {
|
||||
type Output = Node;
|
||||
|
||||
fn index(&self, index: NodeId) -> &Node {
|
||||
&self.nodes[index.0.get() - 1]
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::IndexMut<NodeId> for Qube {
|
||||
fn index_mut(&mut self, index: NodeId) -> &mut Node {
|
||||
&mut self.nodes[index.0.get() - 1]
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Index<StringId> for Qube {
|
||||
type Output = str;
|
||||
|
||||
fn index(&self, index: StringId) -> &str {
|
||||
&self.strings[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeId {
|
||||
pub fn new(value: usize) -> Option<NodeId> {
|
||||
NonZero::new(value).map(NodeId)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
|
||||
struct StringId(lasso::Spur);
|
||||
|
||||
impl ops::Index<StringId> for lasso::Rodeo {
|
||||
type Output = str;
|
||||
|
||||
fn index(&self, index: StringId) -> &str {
|
||||
&self[index.0]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct Node {
|
||||
pub key: StringId,
|
||||
pub metadata: HashMap<StringId, Vec<String>>,
|
||||
pub parent: Option<NodeId>, // If not present, it's the root node
|
||||
pub values: Vec<StringId>,
|
||||
pub children: HashMap<StringId, Vec<NodeId>>,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
fn new_root(q: &mut Qube) -> Node {
|
||||
Node {
|
||||
key: q.get_or_intern("root"),
|
||||
metadata: HashMap::new(),
|
||||
parent: None,
|
||||
values: vec![],
|
||||
children: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn children(&self) -> impl Iterator<Item = &NodeId> {
|
||||
self.children.values().flatten()
|
||||
}
|
||||
|
||||
fn is_root(&self) -> bool {
|
||||
self.parent.is_none()
|
||||
}
|
||||
|
||||
/// Because children are stored grouped by key
|
||||
/// determining the number of children quickly takes a little effort.
|
||||
/// This is a fast method for the special case of checking if a Node has exactly one child.
|
||||
/// Returns Ok(NodeId) if there is one child else None
|
||||
fn has_exactly_one_child(&self) -> Option<NodeId> {
|
||||
if self.children.len() != 1 {return None}
|
||||
let Some(value_group) = self.children.values().next() else {return None};
|
||||
let [node_id] = &value_group.as_slice() else {return None};
|
||||
Some(*node_id)
|
||||
}
|
||||
|
||||
fn n_children(&self) -> usize {
|
||||
self.children
|
||||
.values()
|
||||
.map(|v| v.len())
|
||||
.sum()
|
||||
}
|
||||
|
||||
fn keys<'a>(&'a self, q: &'a Qube) -> impl Iterator<Item = &'a str> {
|
||||
self.children.keys()
|
||||
.map(|s| {&q[*s]})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[pyclass(subclass, dict)]
|
||||
pub struct Qube {
|
||||
pub root: NodeId,
|
||||
nodes: Vec<Node>,
|
||||
strings: Rodeo,
|
||||
}
|
||||
|
||||
impl Qube {
|
||||
pub fn new() -> Self {
|
||||
let mut q = Self {
|
||||
root: NodeId::new(1).unwrap(),
|
||||
nodes: Vec::new(),
|
||||
strings: Rodeo::default(),
|
||||
};
|
||||
|
||||
let root = Node::new_root(&mut q);
|
||||
q.nodes.push(root);
|
||||
q
|
||||
}
|
||||
|
||||
fn get_or_intern(&mut self, val: &str) -> StringId {
|
||||
StringId(self.strings.get_or_intern(val))
|
||||
}
|
||||
|
||||
pub(crate) fn add_node(&mut self, parent: NodeId, key: &str, values: impl IntoIterator<Item = impl AsRef<str>>) -> NodeId {
|
||||
let key_id = self.get_or_intern(key);
|
||||
let values = values.into_iter().map(|val| self.get_or_intern(val.as_ref())).collect();
|
||||
|
||||
// Create the node object
|
||||
let node = Node {
|
||||
key: key_id,
|
||||
metadata: HashMap::new(),
|
||||
values: values,
|
||||
parent: Some(parent),
|
||||
children: HashMap::new(),
|
||||
};
|
||||
|
||||
// Insert it into the Qube arena and determine its id
|
||||
self.nodes.push(node);
|
||||
let node_id = NodeId::new(self.nodes.len()).unwrap();
|
||||
|
||||
// Add a reference to this node's id to the parents list of children.
|
||||
let parent_node = &mut self[parent];
|
||||
let key_group = parent_node.children.entry(key_id).or_insert(Vec::new());
|
||||
key_group.push(node_id);
|
||||
|
||||
node_id
|
||||
}
|
||||
|
||||
fn print(&self, node_id: Option<NodeId>) -> String {
|
||||
let node_id: NodeId = node_id.unwrap_or(self.root);
|
||||
let node = &self[node_id];
|
||||
node.summary(&self)
|
||||
}
|
||||
|
||||
fn get_node_ref(&self, id: NodeId) -> NodeRef {
|
||||
let node = &self[id];
|
||||
NodeRef { id: id, node: &node, qube: &self }
|
||||
}
|
||||
|
||||
pub fn get_string_id(&self, s: &str) -> Option<StringId> {
|
||||
self.strings.get(s)
|
||||
.map(|id| StringId(id))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[pymodule]
|
||||
fn rust(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_class::<Qube>()?;
|
||||
m.add("QubeError", py.get_type::<python_interface::QubeError>())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
pub struct NodeRef<'a> {
|
||||
pub id: NodeId,
|
||||
pub node: &'a Node,
|
||||
pub qube: &'a Qube,
|
||||
}
|
||||
|
||||
impl<'a> NodeRef<'a> {
|
||||
pub fn keys(&self) -> impl Iterator<Item = &str> {
|
||||
self.node.keys(self.qube)
|
||||
}
|
||||
|
||||
fn flat_children(&'a self) -> impl Iterator<Item = Self> {
|
||||
self.node.children
|
||||
.values()
|
||||
.flatten()
|
||||
.map(|id| {
|
||||
NodeRef { id: *id, node: &self.qube[*id], qube: self.qube }
|
||||
})
|
||||
}
|
||||
|
||||
fn children_by_key(&'a self, key: &str) -> impl Iterator<Item = Self> {
|
||||
let id = self.qube.get_string_id(key);
|
||||
let children = id
|
||||
.map(|i| self.node.children.get(&i))
|
||||
.flatten();
|
||||
|
||||
children.map(
|
||||
|ids| ids.into_iter().map(
|
||||
|id| {
|
||||
NodeRef { id: *id, node: &self.qube[*id], qube: self.qube }
|
||||
})).into_iter().flatten()
|
||||
}
|
||||
|
||||
|
||||
}
|
179
src/rust/python_interface.rs
Normal file
179
src/rust/python_interface.rs
Normal file
@ -0,0 +1,179 @@
|
||||
use crate::{Node, NodeId, Qube, NodeRef};
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::{PyList, PyType};
|
||||
use core::borrow;
|
||||
use std::ops::Deref;
|
||||
use std::cell::Ref;
|
||||
|
||||
use crate::set_operations;
|
||||
use crate::serialisation;
|
||||
use itertools::Itertools;
|
||||
|
||||
use pyo3::create_exception;
|
||||
|
||||
create_exception!(qubed, QubeError, pyo3::exceptions::PyException);
|
||||
|
||||
/// A reference to a particular node in a Qube
|
||||
#[pyclass]
|
||||
pub struct PyNodeRef {
|
||||
id: NodeId,
|
||||
qube: Py<Qube>, // see https://pyo3.rs/v0.23.1/types for a discussion of Py<T> and Bound<'py, T>
|
||||
}
|
||||
|
||||
fn into_py_node_ref(node_ref: NodeRef, qube: Py<Qube>) -> PyNodeRef {
|
||||
PyNodeRef {
|
||||
id: node_ref.id,
|
||||
qube: qube,
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl PyNodeRef {
|
||||
fn __repr__(&self, py: Python) -> PyResult<String> {
|
||||
// Get the Py<Qube> reference, bind it to the GIL.
|
||||
let qube = self.qube.bind(py);
|
||||
|
||||
fn repr_helper<'py>(node_id: NodeId, qube: &Bound<'py, Qube>) -> String {
|
||||
let node = &qube.borrow()[node_id];
|
||||
let key = &qube.borrow()[node.key];
|
||||
let children = node
|
||||
.children
|
||||
.values()
|
||||
.flatten()
|
||||
.map(|child_id| repr_helper(child_id.clone(), qube))
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ");
|
||||
|
||||
format!("Node({}, {})", key, children)
|
||||
}
|
||||
|
||||
Ok(repr_helper(self.id, qube))
|
||||
}
|
||||
|
||||
fn __str__(&self, py: Python) -> String {
|
||||
let qube = self.qube.bind(py).borrow();
|
||||
let node = &qube[self.id];
|
||||
let key = &qube.strings[node.key];
|
||||
format!("Node({})", key)
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn get_children(&self, py: Python) -> Vec<Self> {
|
||||
let qube = self.qube.bind(py).borrow();
|
||||
let node = &qube[self.id];
|
||||
node.children
|
||||
.values()
|
||||
.flatten()
|
||||
.map(|child_id| Self {
|
||||
id: *child_id,
|
||||
qube: self.qube.clone_ref(py),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(FromPyObject)]
|
||||
pub enum OneOrMany<T> {
|
||||
One(T),
|
||||
Many(Vec<T>),
|
||||
}
|
||||
|
||||
// Todo: Is there a way to rewrite this so that is doesn't allocate?
|
||||
// Perhaps by returning an iterator?
|
||||
impl<T> Into<Vec<T>> for OneOrMany<T> {
|
||||
fn into(self) -> Vec<T> {
|
||||
match self {
|
||||
OneOrMany::One(v) => vec![v],
|
||||
OneOrMany::Many(vs) => vs,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl Qube {
|
||||
#[new]
|
||||
pub fn py_new() -> Self {
|
||||
Qube::new()
|
||||
}
|
||||
|
||||
#[pyo3(name = "add_node")]
|
||||
pub fn py_add_node(
|
||||
slf: Bound<'_, Self>,
|
||||
parent: PyRef<'_, PyNodeRef>,
|
||||
key: &str,
|
||||
values: OneOrMany<String>,
|
||||
) -> PyResult<PyNodeRef> {
|
||||
// Check that the given parent is actually in this qube and not another one
|
||||
if !parent.qube.bind(slf.py()).is(&slf) {
|
||||
return Err(QubeError::new_err("Supplied parent node is not in the target qube."))
|
||||
}
|
||||
|
||||
// massage values from T | Vec<T> into Vec<T>
|
||||
let values: Vec<String> = values.into();
|
||||
let mut q = slf.borrow_mut();
|
||||
let node_id = q.add_node(parent.id, key, &values);
|
||||
Ok(PyNodeRef { id: node_id, qube: slf.into()})
|
||||
}
|
||||
|
||||
pub fn set_root(
|
||||
slf: Bound<'_, Self>,
|
||||
node: PyRef<'_, PyNodeRef>,
|
||||
) -> () {
|
||||
let mut q = slf.borrow_mut();
|
||||
q.root = node.id;
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_root(slf: Bound<'_, Self>) -> PyResult<PyNodeRef> {
|
||||
Ok(PyNodeRef {
|
||||
id: slf.borrow().root,
|
||||
qube: slf.unbind(),
|
||||
})
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
// format!("{:?}", self)
|
||||
let nodes_str: String = self.nodes.iter()
|
||||
.enumerate()
|
||||
.map(|(id, node)| {
|
||||
format!("{{id: {}, key: {}, values: [{}], children: [{}]}}",
|
||||
id+1,
|
||||
&self[node.key],
|
||||
node.values.iter().map(|s| &self[*s]).join(", "),
|
||||
node.children().map(|n| n.0).join(", "),
|
||||
)
|
||||
}).join(", ");
|
||||
format!("Qube {{root: {}, nodes: {}}}", self.root.0, nodes_str)
|
||||
}
|
||||
|
||||
fn __str__<'py>(&self) -> String {
|
||||
self.string_tree()
|
||||
}
|
||||
|
||||
fn _repr_html_(&self) -> String {
|
||||
self.html_tree()
|
||||
}
|
||||
|
||||
#[pyo3(name = "print")]
|
||||
fn py_print(&self) -> String {
|
||||
self.print(Option::None)
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn get_children(slf: Bound<'_, Self>, py: Python) -> PyResult<Vec<PyNodeRef>> {
|
||||
let root = PyNodeRef {
|
||||
id: slf.borrow().root,
|
||||
qube: slf.unbind(),
|
||||
};
|
||||
Ok(root.get_children(py))
|
||||
}
|
||||
|
||||
#[staticmethod]
|
||||
pub fn from_json(data: &str) -> Result<Self, serialisation::JSONError> {
|
||||
serialisation::from_json(data)
|
||||
}
|
||||
|
||||
pub fn __or__(slf: Bound<'_, Self>, other: Bound<'_, Qube>) -> Qube {
|
||||
set_operations::set_operation(&slf.borrow(), &other.borrow(), set_operations::Op::Union)
|
||||
}
|
||||
}
|
80
src/rust/serialisation/json.rs
Normal file
80
src/rust/serialisation/json.rs
Normal file
@ -0,0 +1,80 @@
|
||||
use pyo3::exceptions::PyValueError;
|
||||
use pyo3::prelude::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{Node, NodeId, Qube};
|
||||
|
||||
// Use a newtype wrapper to allow us to implement auto conversion from serde_json::Error to PyErr
|
||||
// via a wrapper intermediate
|
||||
// see https://pyo3.rs/main/function/error-handling.html#foreign-rust-error-types
|
||||
pub struct JSONError(serde_json::Error);
|
||||
|
||||
impl From<JSONError> for PyErr {
|
||||
fn from(error: JSONError) -> Self {
|
||||
PyValueError::new_err(format!("{}", error.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for JSONError {
|
||||
fn from(other: serde_json::Error) -> Self {
|
||||
Self(other)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(tag = "dtype")]
|
||||
enum Ranges {
|
||||
Int64{values: Vec<(i64, i64)>}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(tag = "dtype", rename_all = "lowercase")]
|
||||
enum Enum {
|
||||
Str{values: Vec<String>}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(tag = "type", rename_all = "lowercase")]
|
||||
enum Values {
|
||||
Wildcard{},
|
||||
Enum(Enum),
|
||||
Range(Ranges)
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct JSONQube {
|
||||
key: String,
|
||||
values: Values,
|
||||
metadata: HashMap<String, String>,
|
||||
children: Vec<JSONQube>,
|
||||
}
|
||||
|
||||
fn add_nodes(qube: &mut Qube, parent: NodeId, nodes: &[JSONQube]) -> Vec<NodeId> {
|
||||
nodes
|
||||
.iter()
|
||||
.map(|json_node| {
|
||||
let values = match &json_node.values {
|
||||
Values::Wildcard{} => &vec!["*"],
|
||||
Values::Enum(Enum::Str{values}) => &values.iter().map(|s| s.as_str()).collect(),
|
||||
Values::Range(_) => todo!(),
|
||||
};
|
||||
let node_id = qube.add_node(parent, &json_node.key, values);
|
||||
|
||||
//
|
||||
add_nodes(qube, node_id, &json_node.children);
|
||||
node_id
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn from_json(data: &str) -> Result<Qube, JSONError> {
|
||||
// Parse the string of data into serde_json::Value.
|
||||
let json_qube: JSONQube = serde_json::from_str(data).expect("JSON parsing failed");
|
||||
|
||||
let mut qube = Qube::new();
|
||||
let root = qube.root;
|
||||
add_nodes(&mut qube, root, &json_qube.children);
|
||||
Ok(qube)
|
||||
}
|
2
src/rust/serialisation/mod.rs
Normal file
2
src/rust/serialisation/mod.rs
Normal file
@ -0,0 +1,2 @@
|
||||
mod json;
|
||||
pub use json::{from_json, JSONError};
|
40
src/rust/set_operations.rs
Normal file
40
src/rust/set_operations.rs
Normal file
@ -0,0 +1,40 @@
|
||||
use crate::NodeRef;
|
||||
use crate::{Node, NodeId, Qube};
|
||||
use itertools::chain;
|
||||
use std::collections::HashSet;
|
||||
|
||||
pub enum Op {
|
||||
Union,
|
||||
Intersection,
|
||||
Difference,
|
||||
SymmetricDifference,
|
||||
}
|
||||
|
||||
fn op_to_venn_diagram(op: Op) -> (bool, bool, bool) {
|
||||
use Op::*;
|
||||
match op {
|
||||
Union => (true, true, true),
|
||||
Intersection => (false, true, false),
|
||||
Difference => (true, false, false),
|
||||
SymmetricDifference => (true, false, true),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_operation<'a>(a: &'a Qube, b: &'a Qube, op: Op) -> Qube {
|
||||
todo!()
|
||||
// _set_operation(a.root_ref(), a.root_ref(), op)
|
||||
}
|
||||
|
||||
// fn _set_operation<'a>(a: NodeRef, b: NodeRef, op: Op) -> Qube {
|
||||
// let keys: HashSet<&str> = HashSet::from_iter(chain(a.keys(), b.keys()));
|
||||
|
||||
// for key in keys {
|
||||
// let a = a.children_by_key(key)
|
||||
// }
|
||||
|
||||
// todo!()
|
||||
// }
|
||||
|
||||
pub fn set_operation_inplace<'a>(a: &'a mut Qube, b: &'a Qube, op: Op) -> &'a Qube {
|
||||
a
|
||||
}
|
BIN
stac_server/favicon.ico
Normal file
BIN
stac_server/favicon.ico
Normal file
Binary file not shown.
After Width: | Height: | Size: 15 KiB |
376
stac_server/main.py
Normal file
376
stac_server/main.py
Normal file
@ -0,0 +1,376 @@
|
||||
import json
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, HTMLResponse
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from frozendict import frozendict
|
||||
from qubed import Qube
|
||||
from qubed.tree_formatters import node_tree_to_html
|
||||
|
||||
app = FastAPI()
|
||||
security = HTTPBearer()
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||
templates = Jinja2Templates(directory="templates")
|
||||
|
||||
qubes: dict[str, Qube] = {}
|
||||
# print("Getting climate and extremes dt data from github")
|
||||
# try:
|
||||
# qubes["climate-dt"] = Qube.from_json(
|
||||
# requests.get(
|
||||
# "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json",
|
||||
# timeout=3).json()
|
||||
# )
|
||||
# qubes["extremes-dt"] = Qube.from_json(
|
||||
# requests.get(
|
||||
# "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/extremes_dt.json",
|
||||
# timeout=3).json()
|
||||
# )
|
||||
# mars_language = yaml.safe_load(
|
||||
# requests.get(
|
||||
# "https://github.com/ecmwf/qubed/raw/refs/heads/main/config/climate-dt/language.yaml",
|
||||
# timeout=3).content
|
||||
# )
|
||||
# except:
|
||||
qubes["climate-dt"] = Qube.empty()
|
||||
qubes["extremes-dt"] = Qube.empty()
|
||||
mars_language = {}
|
||||
|
||||
if "LOCAL_CACHE" in os.environ:
|
||||
print("Getting climate and extremes dt data from local files")
|
||||
with open("../tests/example_qubes/climate_dt.json") as f:
|
||||
qubes["climate-dt"] = Qube.from_json(json.load(f))
|
||||
|
||||
with open("../tests/example_qubes/extremes_dt.json") as f:
|
||||
qubes["climate-dt"] = qubes["climate-dt"] | Qube.from_json(json.load(f))
|
||||
|
||||
with open("../tests/example_qubes/od.json") as f:
|
||||
qubes["climate-dt"] = qubes["climate-dt"] | Qube.from_json(json.load(f))
|
||||
|
||||
with open("../config/language/language.yaml", "r") as f:
|
||||
mars_language = yaml.safe_load(f)["_field"]
|
||||
|
||||
with open("../config/language/paramids.yaml", "r") as f:
|
||||
params = yaml.safe_load(f)
|
||||
else:
|
||||
print("Getting climate and extremes dt data from github")
|
||||
qubes["climate-dt"] = Qube.from_json(
|
||||
requests.get(
|
||||
"https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json",
|
||||
timeout=1,
|
||||
).json()
|
||||
)
|
||||
qubes["extremes-dt"] = Qube.from_json(
|
||||
requests.get(
|
||||
"https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/extremes_dt.json",
|
||||
timeout=1,
|
||||
).json()
|
||||
)
|
||||
|
||||
qubes["od"] = Qube.from_json(
|
||||
requests.get(
|
||||
"https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/od.json",
|
||||
timeout=1,
|
||||
).json()
|
||||
)
|
||||
qubes["climate-dt"] = qubes["climate-dt"] | qubes["extremes-dt"] | qubes["od"]
|
||||
mars_language = yaml.safe_load(
|
||||
requests.get(
|
||||
"https://github.com/ecmwf/qubed/raw/refs/heads/main/config/climate-dt/language.yaml",
|
||||
timeout=3,
|
||||
).content
|
||||
)["_field"]
|
||||
|
||||
if "API_KEY" in os.environ:
|
||||
api_key = os.environ["API_KEY"]
|
||||
else:
|
||||
with open("api_key.secret", "r") as f:
|
||||
api_key = f.read()
|
||||
|
||||
print("Ready to serve requests!")
|
||||
|
||||
|
||||
def validate_key(key: str):
|
||||
if key not in qubes:
|
||||
raise HTTPException(status_code=404, detail=f"Qube {key} not found")
|
||||
return key
|
||||
|
||||
|
||||
async def get_body_json(request: Request):
|
||||
return await request.json()
|
||||
|
||||
|
||||
def parse_request(request: Request) -> dict[str, str | list[str]]:
|
||||
# Convert query parameters to dictionary format
|
||||
request_dict = dict(request.query_params)
|
||||
for key, value in request_dict.items():
|
||||
# Convert comma-separated values into lists
|
||||
if "," in value:
|
||||
request_dict[key] = value.split(",")
|
||||
|
||||
return request_dict
|
||||
|
||||
|
||||
def validate_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
||||
if credentials.credentials != api_key:
|
||||
raise HTTPException(status_code=403, detail="Incorrect API Key")
|
||||
return credentials
|
||||
|
||||
|
||||
@app.get("/favicon.ico", include_in_schema=False)
|
||||
async def favicon():
|
||||
return FileResponse("favicon.ico")
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
async def read_root(request: Request):
|
||||
return templates.TemplateResponse(
|
||||
"index.html",
|
||||
{
|
||||
"request": request,
|
||||
"config": {
|
||||
"message": "Hello from the dev server!",
|
||||
},
|
||||
"api_url": os.environ.get("API_URL", "/api/v1/"),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/v1/keys/")
|
||||
async def keys():
|
||||
return list(qubes.keys())
|
||||
|
||||
|
||||
@app.get("/api/v1/get/{key}/")
|
||||
async def get(
|
||||
key: str = Depends(validate_key),
|
||||
request: dict[str, str | list[str]] = Depends(parse_request),
|
||||
):
|
||||
return qubes[key].to_json()
|
||||
|
||||
|
||||
@app.post("/api/v1/union/{key}/")
|
||||
async def union(
|
||||
key: str,
|
||||
credentials: HTTPAuthorizationCredentials = Depends(validate_api_key),
|
||||
body_json=Depends(get_body_json),
|
||||
):
|
||||
if key not in qubes:
|
||||
qubes[key] = Qube.empty()
|
||||
|
||||
q = Qube.from_json(body_json)
|
||||
qubes[key] = qubes[key] | q
|
||||
return qubes[key].to_json()
|
||||
|
||||
|
||||
def follow_query(request: dict[str, str | list[str]], qube: Qube):
|
||||
s = qube.select(request, mode="next_level", consume=False)
|
||||
by_path = defaultdict(lambda: {"paths": set(), "values": set()})
|
||||
|
||||
for request, node in s.leaf_nodes():
|
||||
if not node.metadata.get("is_leaf", True):
|
||||
by_path[node.key]["values"].update(node.values.values)
|
||||
by_path[node.key]["paths"].add(frozendict(request))
|
||||
|
||||
return s, [
|
||||
{
|
||||
"paths": list(v["paths"]),
|
||||
"key": key,
|
||||
"values": sorted(v["values"], reverse=True),
|
||||
}
|
||||
for key, v in by_path.items()
|
||||
]
|
||||
|
||||
|
||||
@app.get("/api/v1/select/{key}/")
|
||||
async def select(
|
||||
key: str = Depends(validate_key),
|
||||
request: dict[str, str | list[str]] = Depends(parse_request),
|
||||
):
|
||||
q = qubes[key].select(request)
|
||||
return q.to_json()
|
||||
|
||||
|
||||
@app.get("/api/v1/query/{key}")
|
||||
async def query(
|
||||
key: str = Depends(validate_key),
|
||||
request: dict[str, str | list[str]] = Depends(parse_request),
|
||||
):
|
||||
qube, paths = follow_query(request, qubes[key])
|
||||
return paths
|
||||
|
||||
|
||||
@app.get("/api/v1/basicstac/{key}/{filters:path}")
|
||||
async def basic_stac(filters: str, key: str = Depends(validate_key)):
|
||||
pairs = filters.strip("/").split("/")
|
||||
request = dict(p.split("=") for p in pairs if "=" in p)
|
||||
|
||||
qube, _ = follow_query(request, qubes[key])
|
||||
|
||||
def make_link(child_request):
|
||||
"""Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link"""
|
||||
kvs = [f"{key}={value}" for key, value in child_request.items()]
|
||||
href = f"/api/v1/basicstac/{key}/{'/'.join(kvs)}"
|
||||
last_key, last_value = list(child_request.items())[-1]
|
||||
|
||||
return {
|
||||
"title": f"{last_key}={last_value}",
|
||||
"href": href,
|
||||
"rel": "child",
|
||||
"type": "application/json",
|
||||
}
|
||||
|
||||
# Format the response as a STAC collection
|
||||
(this_key, this_value), *_ = (
|
||||
list(request.items())[-1] if request else ("root", "root"),
|
||||
None,
|
||||
)
|
||||
key_info = mars_language.get(this_key, {})
|
||||
try:
|
||||
values_info = dict(key_info.get("values", {}))
|
||||
value_info = values_info.get(
|
||||
this_value, f"No info found for value `{this_value}` found."
|
||||
)
|
||||
except ValueError:
|
||||
value_info = f"No info found for value `{this_value}` found."
|
||||
|
||||
if this_key == "root":
|
||||
value_info = "The root node"
|
||||
# key_desc = key_info.get(
|
||||
# "description", f"No description for `key` {this_key} found."
|
||||
# )
|
||||
print(this_key, this_value)
|
||||
|
||||
print(this_key, key_info)
|
||||
stac_collection = {
|
||||
"type": "Catalog",
|
||||
"stac_version": "1.0.0",
|
||||
"id": "root"
|
||||
if not request
|
||||
else "/".join(f"{k}={v}" for k, v in request.items()),
|
||||
"title": f"{this_key}={this_value}",
|
||||
"description": value_info,
|
||||
"links": [make_link(leaf) for leaf in qube.leaves()],
|
||||
# "debug": {
|
||||
# "qube": str(qube),
|
||||
# },
|
||||
}
|
||||
|
||||
return stac_collection
|
||||
|
||||
|
||||
@app.get("/api/v1/stac/{key}/")
|
||||
async def get_STAC(
|
||||
key: str = Depends(validate_key),
|
||||
request: dict[str, str | list[str]] = Depends(parse_request),
|
||||
):
|
||||
qube, paths = follow_query(request, qubes[key])
|
||||
kvs = [
|
||||
f"{k}={','.join(v)}" if isinstance(v, list) else f"{k}={v}"
|
||||
for k, v in request.items()
|
||||
]
|
||||
request_params = "&".join(kvs)
|
||||
|
||||
def make_link(key_name, paths, values):
|
||||
"""Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link"""
|
||||
href_template = f"/stac?{request_params}{'&' if request_params else ''}{key_name}={{{key_name}}}"
|
||||
|
||||
print(f"{key_name = }")
|
||||
if key_name == "param":
|
||||
print(params)
|
||||
values_from_mars_language = params
|
||||
value_descriptions = [
|
||||
max(params.get(int(v), [""]), key=len) for v in values
|
||||
]
|
||||
print(value_descriptions)
|
||||
else:
|
||||
values_from_mars_language = mars_language.get(key_name, {}).get(
|
||||
"values", []
|
||||
)
|
||||
|
||||
if all(isinstance(v, list) for v in values_from_mars_language):
|
||||
value_descriptions_dict = {
|
||||
k: v[-1]
|
||||
for v in values_from_mars_language
|
||||
if len(v) > 1
|
||||
for k in v[:-1]
|
||||
}
|
||||
value_descriptions = [
|
||||
value_descriptions_dict.get(v, "") for v in values
|
||||
]
|
||||
if not any(value_descriptions):
|
||||
value_descriptions = None
|
||||
|
||||
return {
|
||||
"title": key_name,
|
||||
"uriTemplate": href_template,
|
||||
"rel": "child",
|
||||
"type": "application/json",
|
||||
"variables": {
|
||||
key_name: {
|
||||
"type": "string",
|
||||
"description": mars_language.get(key_name, {}).get(
|
||||
"description", ""
|
||||
),
|
||||
"enum": values,
|
||||
"value_descriptions": value_descriptions,
|
||||
# "paths": paths,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def value_descriptions(key, values):
|
||||
return {
|
||||
v[0]: v[-1]
|
||||
for v in mars_language.get(key, {}).get("values", [])
|
||||
if len(v) > 1 and v[0] in list(values)
|
||||
}
|
||||
|
||||
descriptions = {
|
||||
key: {
|
||||
"key": key,
|
||||
"values": values,
|
||||
"description": mars_language.get(key, {}).get("description", ""),
|
||||
"value_descriptions": value_descriptions(key, values),
|
||||
}
|
||||
for key, values in request.items()
|
||||
}
|
||||
|
||||
# Format the response as a STAC collection
|
||||
stac_collection = {
|
||||
"type": "Catalog",
|
||||
"stac_version": "1.0.0",
|
||||
"id": "root" if not request else "/stac?" + request_params,
|
||||
"description": "STAC collection representing potential children of this request",
|
||||
"links": [make_link(p["key"], p["paths"], p["values"]) for p in paths],
|
||||
"debug": {
|
||||
# "request": request,
|
||||
"descriptions": descriptions,
|
||||
# "paths": paths,
|
||||
"qube": node_tree_to_html(
|
||||
qube.compress(),
|
||||
collapse=True,
|
||||
depth=10,
|
||||
include_css=False,
|
||||
include_js=False,
|
||||
max_summary_length=200,
|
||||
css_id="qube",
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
return stac_collection
|
5
stac_server/requirements.txt
Normal file
5
stac_server/requirements.txt
Normal file
@ -0,0 +1,5 @@
|
||||
fastapi[standard]
|
||||
pe
|
||||
redis
|
||||
frozendict
|
||||
requests
|
3
stac_server/run.sh
Executable file
3
stac_server/run.sh
Executable file
@ -0,0 +1,3 @@
|
||||
parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
|
||||
cd "$parent_path"
|
||||
LOCAL_CACHE=True fastapi dev ./main.py --port 8124 --reload
|
3
stac_server/run_prod.sh
Executable file
3
stac_server/run_prod.sh
Executable file
@ -0,0 +1,3 @@
|
||||
parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
|
||||
cd "$parent_path"
|
||||
sudo LOCAL_CACHE=True ../../.venv/bin/fastapi dev ./main.py --port 80 --host=0.0.0.0 --reload
|
@ -1,14 +1,18 @@
|
||||
// app.js
|
||||
|
||||
// const API_BASE_URL = "http://127.0.0.1:8000/tree";
|
||||
|
||||
// Take the query string and stick it on the API URL
|
||||
function getSTACUrlFromQuery() {
|
||||
const params = new URLSearchParams(window.location.search);
|
||||
|
||||
// get current window url and remove path part
|
||||
let api_url = new URL(window.location.href);
|
||||
api_url.pathname = "/tree";
|
||||
if (window.API_URL.startsWith("http")) {
|
||||
// Absolute URL: Use it directly
|
||||
api_url = new URL(window.API_URL);
|
||||
} else {
|
||||
// Relative URL: Combine with the current window's location
|
||||
api_url = new URL(window.location.href);
|
||||
api_url.pathname = window.API_URL;
|
||||
}
|
||||
|
||||
for (const [key, value] of params.entries()) {
|
||||
api_url.searchParams.set(key, value);
|
||||
@ -65,26 +69,28 @@ function goToNextUrl() {
|
||||
const key_type = item.dataset.keyType;
|
||||
let values = [];
|
||||
|
||||
if (key === "date") {
|
||||
const datePicker = item.querySelector("input[type='date']");
|
||||
//format date as YYYYMMDD
|
||||
const datePicker = item.querySelector("input[type='date']");
|
||||
if (datePicker) {
|
||||
values.push(datePicker.value.replace(/-/g, ""));
|
||||
} else if (key === "time") {
|
||||
const timePicker = item.querySelector("input[type='time']");
|
||||
//format time as HHMM
|
||||
console.log("replace", timePicker.value.replace(":", ""));
|
||||
}
|
||||
|
||||
const timePicker = item.querySelector("input[type='time']");
|
||||
if (timePicker) {
|
||||
values.push(timePicker.value.replace(":", ""));
|
||||
} else if (key_type === "enum") {
|
||||
}
|
||||
|
||||
const enum_checkboxes = item.querySelectorAll(
|
||||
"input[type='checkbox']:checked"
|
||||
);
|
||||
if (enum_checkboxes.length > 0) {
|
||||
values.push(
|
||||
...Array.from(
|
||||
item.querySelectorAll("input[type='checkbox']:checked")
|
||||
).map((checkbox) => checkbox.value)
|
||||
...Array.from(enum_checkboxes).map((checkbox) => checkbox.value)
|
||||
);
|
||||
} else {
|
||||
const any = item.querySelector("input[type='text']");
|
||||
if (any.value !== "") {
|
||||
values.push(any.value);
|
||||
}
|
||||
}
|
||||
|
||||
const any = item.querySelector("input[type='text']");
|
||||
if (any && any.value !== "") {
|
||||
values.push(any.value);
|
||||
}
|
||||
|
||||
// Keep track of whether any new keys are selected
|
||||
@ -108,7 +114,9 @@ function goToNextUrl() {
|
||||
);
|
||||
|
||||
if (existingIndex !== -1) {
|
||||
// If the key already exists, append the values
|
||||
// If the key already exists,
|
||||
// and the values aren't already in there,
|
||||
// append the values
|
||||
request[existingIndex][1] = [...request[existingIndex][1], ...values];
|
||||
} else {
|
||||
// If the key doesn't exist, add a new entry
|
||||
@ -127,80 +135,73 @@ async function createCatalogItem(link, itemsContainer) {
|
||||
itemsContainer.appendChild(itemDiv);
|
||||
|
||||
try {
|
||||
// Fetch details for each item/collection asynchronously
|
||||
let base_url = new URL(window.location.href);
|
||||
base_url.pathname = "/tree";
|
||||
let url = new URL(link.href, base_url);
|
||||
console.log("Fetching item details:", url);
|
||||
const response = await fetch(url);
|
||||
const itemData = await response.json();
|
||||
|
||||
// Update the item div with real content
|
||||
itemDiv.classList.remove("loading");
|
||||
itemDiv.innerHTML = ""; // Clear "Loading..." text
|
||||
|
||||
const variables = link["variables"];
|
||||
const key = Object.keys(variables)[0];
|
||||
const variable = variables[key];
|
||||
|
||||
// add data-key attribute to the itemDiv
|
||||
itemDiv.dataset.key = itemData.id;
|
||||
itemDiv.dataset.keyType = itemData.key_type;
|
||||
itemDiv.dataset.key = link.title;
|
||||
itemDiv.dataset.keyType = variable.type;
|
||||
|
||||
const title = document.createElement("h3");
|
||||
title.className = "item-title";
|
||||
title.textContent = itemData.title || "No title available";
|
||||
itemDiv.appendChild(title);
|
||||
itemDiv.innerHTML = `
|
||||
<h3 class="item-title">${link.title || "No title available"}</h3>
|
||||
<p class="item-type">Key Type: ${itemDiv.dataset.keyType || "Unknown"}</p>
|
||||
<p class="item-description">${
|
||||
variable.description ? variable.description.slice(0, 100) : ""
|
||||
}</p>
|
||||
`;
|
||||
|
||||
const key_type = document.createElement("p");
|
||||
key_type.className = "item-type";
|
||||
key_type.textContent = `Key Type: ${itemData.key_type || "Unknown"}`;
|
||||
itemDiv.appendChild(key_type);
|
||||
|
||||
const optional = document.createElement("p");
|
||||
optional.className = "item-type";
|
||||
optional.textContent = `Optional: ${link.optional || "Unknown"}`;
|
||||
itemDiv.appendChild(optional);
|
||||
|
||||
// const id = document.createElement("p");
|
||||
// id.className = "item-id";
|
||||
// id.textContent = `ID: ${itemData.id || link.href.split("/").pop()}`;
|
||||
// itemDiv.appendChild(id);
|
||||
|
||||
const description = document.createElement("p");
|
||||
description.className = "item-description";
|
||||
const descText = itemData.description
|
||||
? itemData.description.slice(0, 100)
|
||||
: "No description available";
|
||||
description.textContent = `${descText}...`;
|
||||
itemDiv.appendChild(description);
|
||||
|
||||
if (itemData.key_type === "date" || itemData.key_type === "time") {
|
||||
// Render a date picker for the "date" key
|
||||
const picker = `<input type="${itemData.id}" name="${itemData.id}">`;
|
||||
//convert picker to HTML node
|
||||
const pickerNode = document
|
||||
.createRange()
|
||||
.createContextualFragment(picker);
|
||||
itemDiv.appendChild(pickerNode);
|
||||
}
|
||||
// Otherwise create a scrollable list with checkboxes for values if available
|
||||
else if (
|
||||
itemData.key_type === "enum" &&
|
||||
itemData.values &&
|
||||
itemData.values.length > 0
|
||||
) {
|
||||
const listContainer = renderCheckboxList(itemData);
|
||||
if (variable.enum && variable.enum.length > 0) {
|
||||
const listContainer = renderCheckboxList(link);
|
||||
itemDiv.appendChild(listContainer);
|
||||
} else {
|
||||
const any = `<input type="text" name="${itemData.id}">`;
|
||||
const any = `<input type="text" name="${link.title}">`;
|
||||
const anyNode = document.createRange().createContextualFragment(any);
|
||||
itemDiv.appendChild(anyNode);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error loading item data:", error);
|
||||
|
||||
// In case of an error, display an error message
|
||||
itemDiv.innerHTML = "<p>Error loading item details</p>";
|
||||
itemDiv.innerHTML = `<p>Error loading item details: ${error}</p>`;
|
||||
}
|
||||
}
|
||||
|
||||
function renderCheckboxList(link) {
|
||||
const variables = link["variables"];
|
||||
const key = Object.keys(variables)[0];
|
||||
const variable = variables[key];
|
||||
const value_descriptions = variable.value_descriptions || [];
|
||||
|
||||
const listContainerHTML = `
|
||||
<div class="item-list-container">
|
||||
<div class="scrollable-list">
|
||||
${variable.enum
|
||||
.map((value, index) => {
|
||||
const labelText = value_descriptions[index]
|
||||
? `${value} - ${value_descriptions[index]}`
|
||||
: value;
|
||||
return `
|
||||
<div class="checkbox-container">
|
||||
<label class="checkbox-label">
|
||||
<input type="checkbox" class="item-checkbox" value="${value}" ${
|
||||
variable.enum.length === 1 ? "checked" : ""
|
||||
}>
|
||||
${labelText}
|
||||
</label>
|
||||
</div>
|
||||
`;
|
||||
})
|
||||
.join("")}
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
return document.createRange().createContextualFragment(listContainerHTML)
|
||||
.firstElementChild;
|
||||
}
|
||||
|
||||
// Render catalog items in the sidebar
|
||||
function renderCatalogItems(links) {
|
||||
const itemsContainer = document.getElementById("items");
|
||||
@ -217,39 +218,69 @@ function renderCatalogItems(links) {
|
||||
});
|
||||
}
|
||||
|
||||
// Fetch and display item details
|
||||
async function loadItemDetails(url) {
|
||||
try {
|
||||
const resolved_url = new URL(url, API_BASE_URL);
|
||||
const response = await fetch(resolved_url);
|
||||
const item = await response.json();
|
||||
function renderRequestBreakdown(request, descriptions) {
|
||||
const container = document.getElementById("request-breakdown");
|
||||
const format_value = (key, value) => {
|
||||
return `<span class="value" title="${descriptions[key]["value_descriptions"][value]}">"${value}"</span>`;
|
||||
};
|
||||
|
||||
// Show details in the 'details' panel
|
||||
const itemDetails = document.getElementById("item-details");
|
||||
itemDetails.textContent = JSON.stringify(item, null, 2);
|
||||
} catch (error) {
|
||||
console.error("Error loading item details:", error);
|
||||
}
|
||||
const format_values = (key, values) => {
|
||||
if (values.length === 1) {
|
||||
return format_value(key, values[0]);
|
||||
}
|
||||
return `[${values.map((v) => format_value(key, v)).join(", ")}]`;
|
||||
};
|
||||
|
||||
let html =
|
||||
`{\n` +
|
||||
request
|
||||
.map(
|
||||
([key, values]) =>
|
||||
` <span class="key" title="${
|
||||
descriptions[key]["description"]
|
||||
}">"${key}"</span>: ${format_values(key, values)},`
|
||||
)
|
||||
.join("\n") +
|
||||
`\n}`;
|
||||
container.innerHTML = html;
|
||||
}
|
||||
|
||||
function show_resp_in_sidebar(catalog) {
|
||||
const itemDetails = document.getElementById("item-details");
|
||||
itemDetails.textContent = JSON.stringify(catalog, null, 2);
|
||||
function renderRawSTACResponse(catalog) {
|
||||
const itemDetails = document.getElementById("raw-stac");
|
||||
// create new object without debug key
|
||||
let just_stac = Object.assign({}, catalog);
|
||||
delete just_stac.debug;
|
||||
itemDetails.textContent = JSON.stringify(just_stac, null, 2);
|
||||
|
||||
const debug_container = document.getElementById("debug");
|
||||
debug_container.textContent = JSON.stringify(catalog.debug, null, 2);
|
||||
|
||||
const qube_container = document.getElementById("qube");
|
||||
qube_container.innerHTML = catalog.debug.qube;
|
||||
}
|
||||
|
||||
// Fetch STAC catalog and display items
|
||||
async function fetchCatalog(stacUrl) {
|
||||
async function fetchCatalog(request, stacUrl) {
|
||||
try {
|
||||
const response = await fetch(stacUrl);
|
||||
const catalog = await response.json();
|
||||
// Always load the most recently clicked item on the right-hand side
|
||||
show_resp_in_sidebar(catalog);
|
||||
|
||||
// Render the request breakdown in the sidebar
|
||||
renderRequestBreakdown(request, catalog.debug.descriptions);
|
||||
|
||||
// Show the raw STAC in the sidebar
|
||||
renderRawSTACResponse(catalog);
|
||||
|
||||
// Render the items from the catalog
|
||||
if (catalog.links) {
|
||||
console.log("Fetched STAC catalog:", stacUrl, catalog.links);
|
||||
renderCatalogItems(catalog.links);
|
||||
}
|
||||
|
||||
// Highlight the request and raw STAC
|
||||
hljs.highlightElement(document.getElementById("raw-stac"));
|
||||
hljs.highlightElement(document.getElementById("debug"));
|
||||
hljs.highlightElement(document.getElementById("example-python"));
|
||||
} catch (error) {
|
||||
console.error("Error fetching STAC catalog:", error);
|
||||
}
|
||||
@ -258,10 +289,11 @@ async function fetchCatalog(stacUrl) {
|
||||
// Initialize the viewer by fetching the STAC catalog
|
||||
function initializeViewer() {
|
||||
const stacUrl = getSTACUrlFromQuery();
|
||||
const request = get_request_from_url();
|
||||
|
||||
if (stacUrl) {
|
||||
console.log("Fetching STAC catalog from query string URL:", stacUrl);
|
||||
fetchCatalog(stacUrl);
|
||||
fetchCatalog(request, stacUrl);
|
||||
} else {
|
||||
console.error("No STAC URL provided in the query string.");
|
||||
}
|
||||
@ -280,36 +312,3 @@ function initializeViewer() {
|
||||
|
||||
// Call initializeViewer on page load
|
||||
initializeViewer();
|
||||
|
||||
function renderCheckboxList(itemData) {
|
||||
const listContainer = document.createElement("div");
|
||||
listContainer.className = "item-list-container";
|
||||
|
||||
const listLabel = document.createElement("label");
|
||||
listLabel.textContent = "Select values:";
|
||||
listLabel.className = "list-label";
|
||||
|
||||
const scrollableList = document.createElement("div");
|
||||
scrollableList.className = "scrollable-list";
|
||||
|
||||
const checkboxesHtml = itemData.values
|
||||
.map((valueArray) => {
|
||||
const value = Array.isArray(valueArray) ? valueArray[0] : valueArray;
|
||||
const labelText = Array.isArray(valueArray)
|
||||
? valueArray.join(" - ")
|
||||
: valueArray;
|
||||
return `
|
||||
<div class="checkbox-container">
|
||||
<input type="checkbox" class="item-checkbox" value="${value}">
|
||||
<label class="checkbox-label">${labelText}</label>
|
||||
</div>
|
||||
`;
|
||||
})
|
||||
.join("");
|
||||
|
||||
scrollableList.innerHTML = checkboxesHtml;
|
||||
|
||||
listContainer.appendChild(listLabel);
|
||||
listContainer.appendChild(scrollableList);
|
||||
return listContainer;
|
||||
}
|
50
stac_server/static/qube_styles.css
Normal file
50
stac_server/static/qube_styles.css
Normal file
@ -0,0 +1,50 @@
|
||||
pre#qube {
|
||||
font-family: monospace;
|
||||
white-space: pre;
|
||||
font-family: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;
|
||||
font-size: 12px;
|
||||
line-height: 1.4;
|
||||
|
||||
details {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
.qubed-level a {
|
||||
margin-left: 10px;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
summary {
|
||||
list-style: none;
|
||||
cursor: pointer;
|
||||
text-overflow: ellipsis;
|
||||
overflow: hidden;
|
||||
text-wrap: nowrap;
|
||||
display: block;
|
||||
}
|
||||
|
||||
span.qubed-node:hover {
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
details > summary::after {
|
||||
content: ' ▲';
|
||||
}
|
||||
|
||||
details:not([open]) > summary::after {
|
||||
content: " ▼";
|
||||
}
|
||||
|
||||
.qubed-level {
|
||||
text-overflow: ellipsis;
|
||||
overflow: hidden;
|
||||
text-wrap: nowrap;
|
||||
display: block;
|
||||
}
|
||||
|
||||
summary::-webkit-details-marker {
|
||||
display: none;
|
||||
content: "";
|
||||
}
|
||||
|
||||
}
|
@ -1,24 +1,39 @@
|
||||
html,
|
||||
body {
|
||||
min-height: 100vh;
|
||||
height: 100%;
|
||||
|
||||
--accent-color: #003399;
|
||||
--background-grey: #f4f4f4;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
padding-left: 0.5em;
|
||||
padding-right: 0.5em;
|
||||
|
||||
}
|
||||
|
||||
#viewer {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
height: 100vh;
|
||||
height: fit-content;
|
||||
min-height: 100vh;
|
||||
}
|
||||
|
||||
#catalog-list {
|
||||
width: 30%;
|
||||
padding: 10px;
|
||||
overflow-y: scroll;
|
||||
background-color: #f4f4f4;
|
||||
background-color: var(--background-grey);
|
||||
border-right: 1px solid #ddd;
|
||||
}
|
||||
|
||||
#catalog-list h2 {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
#details {
|
||||
width: 70%;
|
||||
padding: 10px;
|
||||
@ -26,13 +41,16 @@ body {
|
||||
|
||||
.sidebar-header {
|
||||
display: flex;
|
||||
justify-content: space-between; /* Center buttons horizontally */
|
||||
margin-bottom: 10px; /* Space below header */
|
||||
height: 3em;
|
||||
justify-content: center;
|
||||
margin-bottom: 10px;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.5em;
|
||||
}
|
||||
|
||||
.sidebar-header button {
|
||||
width: 10em;
|
||||
width: 7em;
|
||||
height: 2em;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
canvas {
|
||||
@ -50,6 +68,7 @@ canvas {
|
||||
margin-bottom: 10px;
|
||||
border-radius: 5px;
|
||||
transition: background-color 0.2s ease;
|
||||
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
.item-title {
|
||||
@ -64,7 +83,8 @@ canvas {
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.item-id, .item-key-type {
|
||||
.item-id,
|
||||
.item-key-type {
|
||||
font-size: 12px;
|
||||
color: #999;
|
||||
}
|
||||
@ -76,43 +96,52 @@ canvas {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
#items {
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.item.selected {
|
||||
background-color: #d4e9ff; /* Lighter blue for selection */
|
||||
border-color: #003399; /* Keep the original ECMWF blue for the border */
|
||||
background-color: var(--background-grey);
|
||||
border-color: var(--accent-color);
|
||||
}
|
||||
|
||||
#item-details {
|
||||
summary h2 {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
.json-pre {
|
||||
white-space: pre-wrap;
|
||||
background-color: #f9f9f9;
|
||||
padding: 10px;
|
||||
/* background-color: #f9f9f9; */
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 5px;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
|
||||
/* Button styles */
|
||||
button {
|
||||
height: 3em;
|
||||
padding: 10px 20px; /* Padding around button text */
|
||||
margin: 0 5px; /* Margin between buttons */
|
||||
background-color: #003399; /* ECMWF blue */
|
||||
color: white; /* White text color */
|
||||
border: none; /* Remove default button border */
|
||||
cursor: pointer; /* Pointer cursor on hover */
|
||||
border-radius: 5px; /* Rounded corners */
|
||||
transition: background-color 0.3s ease; /* Smooth background color transition */
|
||||
padding: 10px 20px;
|
||||
/* Padding around button text */
|
||||
margin: 0 5px;
|
||||
/* Margin between buttons */
|
||||
background-color: var(--accent-color);
|
||||
/* ECMWF blue */
|
||||
color: white;
|
||||
/* White text color */
|
||||
border: none;
|
||||
/* Remove default button border */
|
||||
cursor: pointer;
|
||||
/* Pointer cursor on hover */
|
||||
border-radius: 5px;
|
||||
/* Rounded corners */
|
||||
transition: background-color 0.3s ease;
|
||||
/* Smooth background color transition */
|
||||
}
|
||||
|
||||
button:hover {
|
||||
background-color: #001f66; /* Darker shade of ECMWF blue on hover */
|
||||
background-color: #001f66;
|
||||
/* Darker shade of ECMWF blue on hover */
|
||||
}
|
||||
|
||||
.item-list-container {
|
||||
margin-top: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.scrollable-list {
|
||||
@ -122,7 +151,6 @@ button:hover {
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
background-color: #fff;
|
||||
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
.checkbox-container {
|
||||
@ -142,12 +170,48 @@ button:hover {
|
||||
}
|
||||
|
||||
.checkbox-container:hover .checkbox-label {
|
||||
color: #003399;
|
||||
color: var(--accent-color);
|
||||
}
|
||||
|
||||
.list-label {
|
||||
font-weight: bold;
|
||||
margin-bottom: 5px;
|
||||
margin-bottom: 0.5em;
|
||||
display: block;
|
||||
color: #003399;
|
||||
color: var(--accent-color);
|
||||
}
|
||||
|
||||
span.key,
|
||||
span.value {
|
||||
color: #ba2121;
|
||||
;
|
||||
}
|
||||
|
||||
span.key {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
span.key:hover,
|
||||
span.value:hover {
|
||||
color: #ff2a2a;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/* Change layout for narrow viewport */
|
||||
@media (max-width: 800px) {
|
||||
#viewer {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
#catalog-list {
|
||||
width: 100%;
|
||||
border-right: none;
|
||||
}
|
||||
|
||||
#details {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
|
||||
details h2 {
|
||||
font-size: medium;
|
||||
}
|
79
stac_server/templates/index.html
Normal file
79
stac_server/templates/index.html
Normal file
@ -0,0 +1,79 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>ECMWF DestinE STAC Viewer</title>
|
||||
<link rel="stylesheet" href="/static/styles.css" />
|
||||
<link rel="stylesheet" href="/static/qube_styles.css" />
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css">
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/json.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/python.min.js"></script>
|
||||
<link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📚</text></svg>">
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<div id="viewer">
|
||||
<div id="catalog-list">
|
||||
<h2>STAC Items</h2>
|
||||
<p>{{ config.get('message', '')}}</p>
|
||||
<p>Select one <strong>or multiple</strong> items and then click next to iteratively build up a full request.</p>
|
||||
<p>Last database update: <time>{{config.get('last_database_update', '')}}</time></p>
|
||||
<div class="sidebar-header">
|
||||
<button id="previous-btn">Previous</button>
|
||||
<a id="stac-anchor"><button id="stac-btn">Raw STAC</button></a>
|
||||
<button id="next-btn">Next</button>
|
||||
</div>
|
||||
|
||||
<div id="items">
|
||||
<!-- Items from the STAC catalog will be rendered here -->
|
||||
</div>
|
||||
</div>
|
||||
<div id="details">
|
||||
<h2>Current Selection</h2>
|
||||
This is a <a href="https://github.com/ecmwf/datacube-spec/blob/main/spec/selection.md">MARS Selection</a> object in JSON format. Hover over a key or value for more info.
|
||||
<!-- Container for the request part, preloaded to prevent layout shift. -->
|
||||
<pre><code id="request-breakdown" class="language-json">
|
||||
{
|
||||
}
|
||||
</code></pre>
|
||||
|
||||
<!-- Container to show the current tree -->
|
||||
<h2>Currently Selected Tree</h2></summary>
|
||||
<p>This shows the data <a href="https://qubed.readthedocs.io/en/latest/quickstart.html">qube</a> that matches with the current query. The leaves are the next set if available selections you can make. </p>
|
||||
<pre id = "qube"></pre>
|
||||
|
||||
<details>
|
||||
<summary><h2>Example Qubed Code</h2></summary>
|
||||
See the <a href="https://qubed.readthedocs.io/en/latest/">Qubed documentation</a> for more details.
|
||||
<pre><code id="example-python" class="language-python">
|
||||
# pip install qubed requests
|
||||
import requests
|
||||
from qubed import Qube
|
||||
qube = Qube.from_json(requests.get("{{ api_url }}select/climate-dt/?{{request.url.query}}").json())
|
||||
qube.print()
|
||||
</code></pre>
|
||||
</details>
|
||||
|
||||
<!-- Container fo the raw STAC response -->
|
||||
<details>
|
||||
<summary><h2>Raw STAC Response</h2></summary>
|
||||
<p>See the <a href="https://github.com/ecmwf-projects/catalogs/blob/main/structured_stac.md">STAC Extension Proposal</a> for more details on the format.</p>
|
||||
<pre class="json-pre"><code id="raw-stac" class="language-json"></code></pre>
|
||||
</details>
|
||||
|
||||
<!-- Container for the debug response -->
|
||||
<details>
|
||||
<summary><h2>Debug Info</h2></summary>
|
||||
<pre class="json-pre"><code id="debug" class="language-json"></code></pre>
|
||||
</details>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
window.API_URL = "{{ api_url }}stac/climate-dt/";
|
||||
</script>
|
||||
<script src="/static/app.js"></script>
|
||||
</body>
|
||||
</html>
|
110
structured_stac.md
Normal file
110
structured_stac.md
Normal file
@ -0,0 +1,110 @@
|
||||
# STAC Generalized Datacubes Extension
|
||||
|
||||
- **Title:** Generalized Datacubes
|
||||
- **Identifier:** <https://stac-extensions.github.io/template/v1.0.0/schema.json>
|
||||
- **Field Name Prefix:** generalized_datacube
|
||||
- **Scope:** Catalog
|
||||
- **Extension [Maturity Classification](https://github.com/radiantearth/stac-spec/tree/master/extensions/README.md#extension-maturity):** Proposal
|
||||
- **Owner**: @TomHodson
|
||||
|
||||
This STAC extension borrows the [Draft OGC Records API](https://docs.ogc.org/DRAFTS/20-004.html), specifically the [templated links section](https://docs.ogc.org/DRAFTS/20-004.html#sc_templated_links_with_variables) to give STAC the ability to index very large datasets that conform to a generalised datacube model.
|
||||
|
||||
A typical datacube has a fixed set of dimensions `[a, b, c..]` , each of which have a fixed span `{a: ["temp","rainfall"], b : [1-7], c:[True, False]}` such that we can access data by indexing, i.e providing a value for each axis, `a="rainfall", b=1, ...`. A generalized datacube, by our defintion, allow the dimensions to change during indexing, so choosing `a="rainfall"` might yield a different set of axes from `a="temp"`.
|
||||
|
||||
The [STAC Datacube][datacube_extension] extension serves the needs of datacubes that appear in STAC as Items or Collections, i.e as leaves in the tree. This extension instead focussing on allowing STAC to serve as an interface to dynamically explore the branches of generalised datacubes. It does this by adding additional metadata from the OGC Records standard to the children of Catalog entries.
|
||||
|
||||
In practice, what this proposal does is:
|
||||
|
||||
1. For child items that represent many distinct children, replace `"links":` with `"linkTemplates":` in the Catalog entry. (Following the example of the OGC Records API.)
|
||||
2. For each `rel: Child` object in `linkTemplates`:
|
||||
|
||||
a. Add a `variables` key following the OGC Records API whose values is a dictionary with entries like
|
||||
|
||||
```json
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"application/vnd.google-earth.kml+xml",
|
||||
"application/vnd.google-earth.kmz",
|
||||
"image/png",
|
||||
"image/jpeg",
|
||||
"image/gif",
|
||||
"image/png; mode=8bit",
|
||||
"application/x-pdf",
|
||||
"image/svg+xml",
|
||||
"image/tiff"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
b. Add a "uriTemplate" key that specifies how to contruct the resulting URL: i.e `http://hostname.tld/app/index.html?class=od&format={format}`
|
||||
|
||||
This enables a child object to represent a whole axis and its allowed values. Since `href` must now be constructed dynamically, we rempve it and add a `generalized_datacube:href_template` attribute to communicate how to construct the URLs corresponding to particular choice of value or values.
|
||||
|
||||
[gen_datacubes]: https://github.com/ecmwf/datacube-spec
|
||||
[link_objects]: https://github.com/radiantearth/stac-spec/blob/master/commons/links.md#link-object
|
||||
[datacube_extension]: https://github.com/stac-extensions/datacube
|
||||
|
||||
## Examples
|
||||
A typical `Catalog` entry with this extension:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "Catalog",
|
||||
"title": "Operational Data",
|
||||
"id": "rainfall",
|
||||
"stac_version": "1.0.0",
|
||||
"description": "ECMWF's Operational Data Archive",
|
||||
"linkTemplates": [
|
||||
{
|
||||
"rel": "child",
|
||||
"title": "Expver - Experiment Version",
|
||||
"uriTemplate": "http://hostname.tld/app/index.html?class=od&expver={expver}",
|
||||
"type": "application/json",
|
||||
"variables" : {
|
||||
"expver" : {
|
||||
"description": "Experiment version, 0001 selects operational data.",
|
||||
"type" : "string",
|
||||
"enum" : ["0001", "xxxx"],
|
||||
"value_descriptions" : ["Operational Data", "Experimental Data"],
|
||||
"optional" : false,
|
||||
}
|
||||
}
|
||||
""
|
||||
|
||||
},
|
||||
],
|
||||
"stac_extensions": [
|
||||
"https://stac-extensions.github.io/generalised_datacubes/v1.0.0/schema.json"
|
||||
],
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Fields
|
||||
|
||||
The fields in the table below can be used in these parts of STAC documents:
|
||||
|
||||
- [ ] Catalogs
|
||||
- [ ] Collections
|
||||
- [ ] Item Properties (incl. Summaries in Collections)
|
||||
- [ ] Assets (for both Collections and Items, incl. Item Asset Definitions in Collections)
|
||||
- [x] Links
|
||||
|
||||
| Field Name | Type | Description |
|
||||
| -------------------- | ------------------------- | --------------------------------------------------------------------------------------------------------------------- |
|
||||
| uriTemplate | URI Template | Of the form "http://hostname.tld/app/index.html?class=od&expver={expver}", follows OGC Records Spec for uriTemplates |
|
||||
| variables | | |
|
||||
|
||||
|
||||
|
||||
|
||||
### Additional Field Information
|
||||
|
||||
#### uriTemplate
|
||||
Todo
|
||||
|
||||
|
||||
#### variables
|
||||
Todo
|
12
test_scripts/new_format.py
Normal file
12
test_scripts/new_format.py
Normal file
@ -0,0 +1,12 @@
|
||||
from pathlib import Path
|
||||
|
||||
import orjson as json
|
||||
from tree_traverser.DataCubeTree import CompressedTree
|
||||
|
||||
data_path = Path("./config/climate-dt/new_format.json")
|
||||
with data_path.open("r") as f:
|
||||
compressed_tree = CompressedTree.from_json(json.loads(f.read()))
|
||||
|
||||
compressed_tree = compressed_tree.guess_datatypes()
|
||||
|
||||
compressed_tree.print(depth=10)
|
17
test_scripts/open_climate_dt.py
Normal file
17
test_scripts/open_climate_dt.py
Normal file
@ -0,0 +1,17 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from tree_traverser import CompressedTree
|
||||
|
||||
data_path = Path("./config/climate-dt/compressed_tree.json")
|
||||
# Print size of file
|
||||
print(f"climate dt compressed tree: {data_path.stat().st_size // 1e6:.1f} MB")
|
||||
|
||||
print("Opening json file")
|
||||
compressed_tree = CompressedTree.load(data_path)
|
||||
|
||||
print(compressed_tree.to_json())
|
||||
|
||||
print("Outputting compressed tree ecmwf style")
|
||||
with open("config/climate-dt/new_format.json", "w") as f:
|
||||
json.dump(compressed_tree.to_json(), f)
|
52
test_scripts/reconstruct.py
Normal file
52
test_scripts/reconstruct.py
Normal file
@ -0,0 +1,52 @@
|
||||
from pathlib import Path
|
||||
|
||||
from tree_traverser import CompressedTree, RefcountedDict
|
||||
|
||||
|
||||
class CompressedTreeFixed(CompressedTree):
|
||||
@classmethod
|
||||
def from_json(cls, data: dict):
|
||||
c = cls({})
|
||||
c.cache = {}
|
||||
ca = data["cache"]
|
||||
for k, v in ca.items():
|
||||
g = {
|
||||
k2: ca[str(v2)]["dict"][k2] if k2 in ca[str(v2)]["dict"] else v2
|
||||
for k2, v2 in v["dict"].items()
|
||||
}
|
||||
c.cache[int(k)] = RefcountedDict(g)
|
||||
c.cache[int(k)].refcount = v["refcount"]
|
||||
|
||||
c.root_hash = data["root_hash"]
|
||||
c.tree = c.cache[c.root_hash]
|
||||
return c
|
||||
|
||||
def reconstruct(self, max_depth=None) -> dict[str, dict]:
|
||||
"Reconstruct the tree as a normal nested dictionary"
|
||||
|
||||
def reconstruct_node(h: int, depth: int) -> dict[str, dict]:
|
||||
if max_depth is not None and depth > max_depth:
|
||||
return {}
|
||||
return {
|
||||
k: reconstruct_node(v, depth=depth + 1)
|
||||
for k, v in self.cache[h].items()
|
||||
}
|
||||
|
||||
return reconstruct_node(self.root_hash, depth=0)
|
||||
|
||||
|
||||
data_path = Path("data/compressed_tree_climate_dt.json")
|
||||
# Print size of file
|
||||
print(f"climate dt compressed tree: {data_path.stat().st_size // 1e6:.1f} MB")
|
||||
|
||||
print("Opening json file")
|
||||
compressed_tree = CompressedTreeFixed.load(data_path)
|
||||
|
||||
output_data_path = Path("data/compressed_tree_climate_dt_ecmwf_style.json")
|
||||
# Print size of file
|
||||
|
||||
compressed_tree.save(output_data_path)
|
||||
|
||||
print(
|
||||
f"climate dt compressed tree ecmwf style: {output_data_path.stat().st_size // 1e6:.1f} MB"
|
||||
)
|
81
test_scripts/rust.py
Normal file
81
test_scripts/rust.py
Normal file
@ -0,0 +1,81 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Sequence
|
||||
|
||||
from qubed.rust import Qube as rsQube
|
||||
|
||||
# q = pyQube.from_tree("""
|
||||
# root, class=d1
|
||||
# ├── dataset=another-value, generation=1/2/3
|
||||
# └── dataset=climate-dt/weather-dt, generation=1/2/3/4
|
||||
# """)
|
||||
# json_str = json.dumps(q.to_json())
|
||||
# rust_qube = Qube.from_json(json_str)
|
||||
# # print(repr(rust_qube))
|
||||
|
||||
# # print(json_str)
|
||||
|
||||
# expected = """root, class=d1
|
||||
# ├── dataset=another-value, generation=1/2/3
|
||||
# └── dataset=climate-dt/weather-dt, generation=1/2/3/4
|
||||
# """
|
||||
# assert repr(rust_qube) == expected
|
||||
# # print(rs_qube._repr_html_())
|
||||
|
||||
# print(q | q)
|
||||
|
||||
value = str | int | float | datetime
|
||||
|
||||
|
||||
class Qube(rsQube):
|
||||
@classmethod
|
||||
def empty(cls):
|
||||
q = cls()
|
||||
print(f"empty called {cls = } {q = }")
|
||||
return q
|
||||
|
||||
@classmethod
|
||||
def from_datacube(cls, datacube: dict[str, value | Sequence[value]]) -> Qube:
|
||||
qube = cls.empty()
|
||||
(key, values), *key_vals = list(datacube.items())
|
||||
node = qube.add_node(qube.root, key, values)
|
||||
for key, values in key_vals:
|
||||
node = qube.add_node(parent=node, key=key, values=values)
|
||||
|
||||
return qube
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict) -> Qube:
|
||||
q = cls.empty()
|
||||
|
||||
def from_dict(parent, d: dict):
|
||||
for k, children in d.items():
|
||||
key, values = k.split("=")
|
||||
values = values.split("/")
|
||||
|
||||
node = q.add_node(
|
||||
parent=parent,
|
||||
key=key,
|
||||
values=values,
|
||||
)
|
||||
from_dict(parent=node, d=children)
|
||||
|
||||
from_dict(q.root, d)
|
||||
return q
|
||||
|
||||
|
||||
q = Qube.from_datacube({"a": ["4"], "b": "test", "c": ["1", "2", "3"]})
|
||||
|
||||
print(q)
|
||||
print(repr(q))
|
||||
|
||||
q = Qube.from_dict(
|
||||
{
|
||||
"a=2/3": {"b=1": {}},
|
||||
"a2=a/b": {"b2=1/2": {}},
|
||||
}
|
||||
)
|
||||
|
||||
print(q)
|
||||
print(repr(q))
|
69
test_scripts/test.py
Normal file
69
test_scripts/test.py
Normal file
@ -0,0 +1,69 @@
|
||||
from tree_traverser import backend, CompressedTree
|
||||
import datetime
|
||||
import psutil
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
import json
|
||||
from more_itertools import chunked
|
||||
|
||||
process = psutil.Process()
|
||||
|
||||
|
||||
def massage_request(r):
|
||||
return {k: v if isinstance(v, list) else [v] for k, v in r.items()}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
config = """
|
||||
---
|
||||
type: remote
|
||||
host: databridge-prod-catalogue1-ope.ewctest.link
|
||||
port: 10000
|
||||
engine: remote
|
||||
store: remote
|
||||
"""
|
||||
|
||||
request = {
|
||||
"class": "d1",
|
||||
"dataset": "climate-dt",
|
||||
# "date": "19920420",
|
||||
}
|
||||
|
||||
data_path = Path("data/compressed_tree_climate_dt.json")
|
||||
if not data_path.exists():
|
||||
compressed_tree = CompressedTree({})
|
||||
else:
|
||||
compressed_tree = CompressedTree.load(data_path)
|
||||
|
||||
fdb = backend.PyFDB(fdb_config=config)
|
||||
|
||||
visited_path = Path("data/visited_dates.json")
|
||||
if not visited_path.exists():
|
||||
visited_dates = set()
|
||||
else:
|
||||
with open(visited_path, "r") as f:
|
||||
visited_dates = set(json.load(f))
|
||||
|
||||
today = datetime.datetime.today()
|
||||
start = datetime.datetime.strptime("19920420", "%Y%m%d")
|
||||
date_list = [
|
||||
start + datetime.timedelta(days=x) for x in range((today - start).days)
|
||||
]
|
||||
date_list = [d.strftime("%Y%m%d") for d in date_list if d not in visited_dates]
|
||||
for dates in chunked(tqdm(date_list), 5):
|
||||
print(dates[0])
|
||||
print(f"Memory usage: {(process.memory_info().rss) / 1e6:.1f} MB")
|
||||
|
||||
r = request | dict(date=dates)
|
||||
tree = fdb.traverse_fdb(massage_request(r))
|
||||
|
||||
compressed_tree.insert_tree(tree)
|
||||
compressed_tree.save(data_path)
|
||||
|
||||
for date in dates:
|
||||
visited_dates.add(date)
|
||||
|
||||
with open(visited_path, "w") as f:
|
||||
json.dump(list(visited_dates), f)
|
||||
|
||||
# print(compressed_tree.reconstruct_compressed_ecmwf_style())
|
99
test_scripts/update_dts.py
Normal file
99
test_scripts/update_dts.py
Normal file
@ -0,0 +1,99 @@
|
||||
# Example script for ingesting data from an fdb into a qube
|
||||
# Notes
|
||||
# Uses fdb --compact
|
||||
# Splits by data in order to avoid out of memory problems with fdb --compact
|
||||
# Does a bit of processing like removing "year" and "month" keys
|
||||
# Might want to add datatypes and reordering of keys there too
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from datetime import datetime, timedelta
|
||||
from time import time
|
||||
|
||||
import psutil
|
||||
from qubed import Qube
|
||||
from tqdm import tqdm
|
||||
import requests
|
||||
|
||||
process = psutil.Process()
|
||||
|
||||
CHUNK_SIZE = timedelta(days=60)
|
||||
FILEPATH = "tests/example_qubes/full_dt.json"
|
||||
API = "https://qubed.lumi.apps.dte.destination-earth.eu/api/v1"
|
||||
|
||||
with open("config/api.secret", "r") as f:
|
||||
secret = f.read()
|
||||
|
||||
def ecmwf_date(d):
|
||||
return d.strftime("%Y%m%d")
|
||||
|
||||
|
||||
start_date = datetime.now() - timedelta(days=120)
|
||||
# start_date = datetime(1990, 1, 1)
|
||||
# end_date = datetime.now()
|
||||
end_date = datetime(2026, 1, 1)
|
||||
|
||||
current_span = [end_date - CHUNK_SIZE, end_date]
|
||||
|
||||
try:
|
||||
qube = Qube.load(FILEPATH)
|
||||
except:
|
||||
qube = Qube.empty()
|
||||
|
||||
while current_span[0] > start_date:
|
||||
for config in ["config/config-climate-dt.yaml", "config/config-extremes-dt.yaml"]:
|
||||
t0 = time()
|
||||
start, end = map(ecmwf_date, current_span)
|
||||
print(f"Doing {config} {current_span[0].date()} - {current_span[1].date()}")
|
||||
print(f"Current memory usage: {process.memory_info().rss / 1e9:.2g}GB")
|
||||
print(f"{qube.n_nodes = }, {qube.n_leaves = },")
|
||||
|
||||
subqube = Qube.empty()
|
||||
command = [
|
||||
f"fdb list --compact --config {config} --minimum-keys=date class=d1,date={start}/{end}"
|
||||
]
|
||||
try:
|
||||
p = subprocess.run(
|
||||
command,
|
||||
text=True,
|
||||
shell=True,
|
||||
stderr=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
check=True,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Failed for {current_span} {e}")
|
||||
continue
|
||||
|
||||
print("Got compact list")
|
||||
for i, line in tqdm(enumerate(list(p.stdout.split("\n")))):
|
||||
if not line.startswith("retrieve,class="):
|
||||
continue
|
||||
|
||||
def split(t):
|
||||
return t[0], t[1].split("/")
|
||||
|
||||
# Could do datatypes here
|
||||
request = dict(split(v.split("=")) for v in line.strip().split(",")[1:])
|
||||
request.pop("year", None)
|
||||
request.pop("month", None)
|
||||
# Could do things like date = year + month + day
|
||||
q = Qube.from_datacube(request)
|
||||
subqube = subqube | q
|
||||
print("added to qube")
|
||||
|
||||
qube = qube | subqube
|
||||
subqube.print(depth=2)
|
||||
print(f"{subqube.n_nodes = }, {subqube.n_leaves = },")
|
||||
|
||||
requests.post(
|
||||
API + "/union/climate-dt/",
|
||||
headers = {"Authorization" : f"Bearer {secret}"},
|
||||
json = subqube.to_json())
|
||||
|
||||
current_span = [current_span[0] - CHUNK_SIZE, current_span[0]]
|
||||
print(
|
||||
f"Did that taking {(time() - t0) / CHUNK_SIZE.days:2g} seconds per day ingested, total {(time() - t0):2g}s"
|
||||
)
|
||||
with open(FILEPATH, "w") as f:
|
||||
json.dump(qube.to_json(), f)
|
BIN
tests/data/fdb_list_compact.gz
Normal file
BIN
tests/data/fdb_list_compact.gz
Normal file
Binary file not shown.
BIN
tests/data/fdb_list_porcelain.gz
Normal file
BIN
tests/data/fdb_list_porcelain.gz
Normal file
Binary file not shown.
BIN
tests/data/mars_list.gz
Normal file
BIN
tests/data/mars_list.gz
Normal file
Binary file not shown.
1
tests/example_qubes/cads.json
Normal file
1
tests/example_qubes/cads.json
Normal file
File diff suppressed because one or more lines are too long
1
tests/example_qubes/climate_dt.json
Normal file
1
tests/example_qubes/climate_dt.json
Normal file
File diff suppressed because one or more lines are too long
1
tests/example_qubes/climate_dt_old_schema.json
Normal file
1
tests/example_qubes/climate_dt_old_schema.json
Normal file
File diff suppressed because one or more lines are too long
1
tests/example_qubes/extremes_dt.json
Normal file
1
tests/example_qubes/extremes_dt.json
Normal file
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user