Add pre-commit hooks and run them

This commit is contained in:
Tom 2025-02-26 09:11:30 +00:00
parent 162dd48748
commit 68ad80e435
74 changed files with 1093 additions and 745 deletions

View File

@ -19,7 +19,7 @@ permissions:
jobs: jobs:
linux: linux:
runs-on: runs-on:
- ubuntu-latest - ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -31,4 +31,4 @@ jobs:
python -m pip install ".[dev]" python -m pip install ".[dev]"
- name: Test - name: Test
run: | run: |
pytest pytest

View File

@ -4,27 +4,39 @@ from pathlib import Path
CARGO_TOML_PATH = Path("Cargo.toml") CARGO_TOML_PATH = Path("Cargo.toml")
# Get the latest Git tag and strip the leading 'v' if present # Get the latest Git tag and strip the leading 'v' if present
def get_git_version(): def get_git_version():
try: try:
version = subprocess.check_output(["git", "describe", "--tags", "--always"], text=True).strip() version = subprocess.check_output(
["git", "describe", "--tags", "--always"], text=True
).strip()
version = re.sub(r"^v", "", version) # Remove leading 'v' version = re.sub(r"^v", "", version) # Remove leading 'v'
return version return version
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
raise RuntimeError("Failed to get Git tag. Make sure you have at least one tag in the repository.") raise RuntimeError(
"Failed to get Git tag. Make sure you have at least one tag in the repository."
)
# Update version in Cargo.toml # Update version in Cargo.toml
def update_cargo_version(new_version): def update_cargo_version(new_version):
cargo_toml = CARGO_TOML_PATH.read_text() cargo_toml = CARGO_TOML_PATH.read_text()
# Replace version in [package] section # Replace version in [package] section
updated_toml = re.sub(r'^version = "[^"]+"', f'version = "{new_version}"', cargo_toml, flags=re.MULTILINE) updated_toml = re.sub(
r'^version = "[^"]+"',
f'version = "{new_version}"',
cargo_toml,
flags=re.MULTILINE,
)
CARGO_TOML_PATH.write_text(updated_toml) CARGO_TOML_PATH.write_text(updated_toml)
if __name__ == "__main__": if __name__ == "__main__":
version = get_git_version() version = get_git_version()
print(f"Parsed version: {version}") print(f"Parsed version: {version}")
update_cargo_version(version) update_cargo_version(version)
print(f"Updated Cargo.toml with version: {version}") print(f"Updated Cargo.toml with version: {version}")

16
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,16 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
# - id: check-yaml
- id: check-added-large-files
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.7
hooks:
- id: ruff
args: [ --fix ]
- id: ruff-format

View File

@ -26,4 +26,4 @@ python:
- method: pip - method: pip
path: . path: .
extra_requirements: extra_requirements:
- docs - docs

View File

@ -22,4 +22,4 @@ path = "./src/rust/lib.rs"
# rsfdb = { path = "../rsfdb" } # rsfdb = { path = "../rsfdb" }
# [patch.'https://github.com/ecmwf-projects/rsfindlibs'] # [patch.'https://github.com/ecmwf-projects/rsfindlibs']
# rsfindlibs = { path = "../rsfindlibs" } # rsfindlibs = { path = "../rsfindlibs" }

View File

@ -198,4 +198,4 @@
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.

View File

@ -33,7 +33,7 @@ root
└── expver=0002, param=1/2 └── expver=0002, param=1/2
``` ```
Qubed provides all the algorithms on this data structure you would expect such as intersection/union/difference, compression, search, filtering etc. Qubed provides all the algorithms on this data structure you would expect such as intersection/union/difference, compression, search, filtering etc.
In addition to this core datastructure, this repostitory contains a collection of components designed to deliver user friendly cataloging for datacube data. The STAC Server, Frontend and a periodic job to do tree compression can be deployed together to kubernetes using the [helm chart](./helm_chart). Thise deployment can then be accessed either via the Query Builder Web interface or the python client. In addition to this core datastructure, this repostitory contains a collection of components designed to deliver user friendly cataloging for datacube data. The STAC Server, Frontend and a periodic job to do tree compression can be deployed together to kubernetes using the [helm chart](./helm_chart). Thise deployment can then be accessed either via the Query Builder Web interface or the python client.
@ -45,7 +45,7 @@ In addition to this core datastructure, this repostitory contains a collection o
- 🌟 Implements our proposed [Datacube STAC Extension](./structured_stac.md). - 🌟 Implements our proposed [Datacube STAC Extension](./structured_stac.md).
- 🛠️ Allows efficient traversal of ECMWF's datacubes. - 🛠️ Allows efficient traversal of ECMWF's datacubes.
- Part of the implementation of this is [🌲 Tree Compressor](./tree_compresser), a **compressed tree representation** optimised for storing trees with many duplicated subtress. - Part of the implementation of this is [🌲 Tree Compressor](./tree_compresser), a **compressed tree representation** optimised for storing trees with many duplicated subtress.
- 🔗 **[Live Example](https://climate-catalogue.lumi.apps.dte.destination-earth.eu/api/stac?root=root&activity=story-nudging%2Cscenariomip&class=d1)**. - 🔗 **[Live Example](https://climate-catalogue.lumi.apps.dte.destination-earth.eu/api/stac?root=root&activity=story-nudging%2Cscenariomip&class=d1)**.
--- ---
@ -58,7 +58,7 @@ In addition to this core datastructure, this repostitory contains a collection o
--- ---
### TODO: 🐍 [Qubed Python Query Builder](./python_query_builder) ### TODO: 🐍 [Qubed Python Query Builder](./python_query_builder)
> **Python Client** > **Python Client**
- 🤖 A Python client for the **STAC Server**. - 🤖 A Python client for the **STAC Server**.

View File

@ -3,10 +3,10 @@ Initial Python Implementation
[x] Basic Qube datastructure [x] Basic Qube datastructure
[x] Compression [x] Compression
[x] Set Operations (Union, Difference, Intersection...) [x] Set Operations (Union, Difference, Intersection...)
[x] Query with request [x] Query with request
[x] Iteration over leaves [x] Iteration over leaves
[x] Iteration over datacubes [x] Iteration over datacubes
[x] Command line creation from fdb list --compact [x] Command line creation from fdb list --compact
[ ] Set up periodic updates to climate-dt/extremes-dt again [ ] Set up periodic updates to climate-dt/extremes-dt again
[ ] Maybe also do production db? [ ] Maybe also do production db?
[ ] Do mars list to contraints conversion [ ] Do mars list to contraints conversion
@ -47,4 +47,4 @@ Performant Membership Queries
- Identifier membership - Identifier membership
- Datacube query (selection) - Datacube query (selection)
Metadata Storage Metadata Storage

View File

@ -1,6 +1,6 @@
apiVersion: v2 apiVersion: v2
name: stac-server name: stac-server
description: A Helm chart for the STAC Server with frontend, STAC API and caching service. description: A Helm chart for the STAC Server with frontend, STAC API and caching service.
type: application type: application
version: 0.1.0 version: 0.1.0
appVersion: "0.1.0" appVersion: "0.1.0"

View File

@ -32,7 +32,3 @@ spec:
- {{ .Values.ingress.hostname }} - {{ .Values.ingress.hostname }}
secretName: {{ .Values.ingress.tlsSecretName }} secretName: {{ .Values.ingress.tlsSecretName }}
{{- end }} {{- end }}

View File

@ -8,4 +8,4 @@
# file2.txt: |- # file2.txt: |-
# {{ .Files.Get "files/file2.txt" | nindent 2 }} # {{ .Files.Get "files/file2.txt" | nindent 2 }}
# file3.txt: |- # file3.txt: |-
# {{ .Files.Get "files/file3.txt" | nindent 2 }} # {{ .Files.Get "files/file3.txt" | nindent 2 }}

View File

@ -34,4 +34,4 @@ spec:
- protocol: TCP - protocol: TCP
port: {{ .Values.webQueryBuilder.servicePort }} port: {{ .Values.webQueryBuilder.servicePort }}
targetPort: {{ .Values.webQueryBuilder.servicePort }} targetPort: {{ .Values.webQueryBuilder.servicePort }}
type: ClusterIP type: ClusterIP

View File

@ -30,4 +30,4 @@ webQueryBuilder:
ingress: ingress:
enabled: True enabled: True
tlsSecretName: "lumi-wildcard-tls" tlsSecretName: "lumi-wildcard-tls"
hostname: "climate-catalogue.lumi.apps.dte.destination-earth.eu" hostname: "climate-catalogue.lumi.apps.dte.destination-earth.eu"

View File

@ -43,4 +43,3 @@ services:
# volumes: # volumes:
# - ./web_query_builder:/code/web_query_builder # - ./web_query_builder:/code/web_query_builder
# restart: always # restart: always

View File

@ -140,7 +140,7 @@ _field: &_field
- [lwda, long window daily archive] # extremes-dt - [lwda, long window daily archive] # extremes-dt
- [lwwv, long window wave] # extremes-dt - [lwwv, long window wave] # extremes-dt
- [clmn, climate-monthly, Climate run monthly means output] # climate-dt - [clmn, climate-monthly, Climate run monthly means output] # climate-dt
# - [amap, analysis for multianalysis project] # - [amap, analysis for multianalysis project]
# - [ammc, melbourne] # - [ammc, melbourne]
# - [cher, ch, chernobyl] # - [cher, ch, chernobyl]
@ -468,7 +468,7 @@ _field: &_field
type: enum type: enum
multiple: true multiple: true
values: values:
- [20211021, ] - [20211021, ]
year: year:
category: data category: data

File diff suppressed because one or more lines are too long

View File

@ -140,7 +140,7 @@ _field: &_field
- [lwda, long window daily archive] # extremes-dt - [lwda, long window daily archive] # extremes-dt
- [lwwv, long window wave] # extremes-dt - [lwwv, long window wave] # extremes-dt
- [clmn, climate-monthly, Climate run monthly means output] # climate-dt - [clmn, climate-monthly, Climate run monthly means output] # climate-dt
# - [amap, analysis for multianalysis project] # - [amap, analysis for multianalysis project]
# - [ammc, melbourne] # - [ammc, melbourne]
# - [cher, ch, chernobyl] # - [cher, ch, chernobyl]
@ -468,7 +468,7 @@ _field: &_field
type: enum type: enum
multiple: true multiple: true
values: values:
- [20211021, ] - [20211021, ]
year: year:
category: data category: data

View File

@ -140,7 +140,7 @@ _field: &_field
- [lwda, long window daily archive] # extremes-dt - [lwda, long window daily archive] # extremes-dt
- [lwwv, long window wave] # extremes-dt - [lwwv, long window wave] # extremes-dt
- [clmn, climate-monthly, Climate run monthly means output] # climate-dt - [clmn, climate-monthly, Climate run monthly means output] # climate-dt
# - [amap, analysis for multianalysis project] # - [amap, analysis for multianalysis project]
# - [ammc, melbourne] # - [ammc, melbourne]
# - [cher, ch, chernobyl] # - [cher, ch, chernobyl]
@ -468,7 +468,7 @@ _field: &_field
type: enum type: enum
multiple: true multiple: true
values: values:
- [20211021, ] - [20211021, ]
year: year:
category: data category: data

View File

@ -3,7 +3,7 @@ FROM python:3.12-slim AS base
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
build-essential \ build-essential \
curl \ curl \
openssh-client \ openssh-client \
git \ git \
&& apt-get clean && apt-get clean
@ -30,7 +30,7 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
# COPY config/destinE/schema /config/schema # COPY config/destinE/schema /config/schema
# COPY config/destinE/language.yaml /config/language.yaml # COPY config/destinE/language.yaml /config/language.yaml
COPY ./tree_compresser /code/tree_compresser COPY ./tree_compresser /code/tree_compresser
# Clone the rsfdb and rsfindlibs repos manually because they're private # Clone the rsfdb and rsfindlibs repos manually because they're private
@ -39,7 +39,7 @@ COPY ./tree_compresser /code/tree_compresser
COPY stac_server/deps/rsfdb /code/rsfdb COPY stac_server/deps/rsfdb /code/rsfdb
COPY stac_server/deps/rsfindlibs /code/rsfindlibs COPY stac_server/deps/rsfindlibs /code/rsfindlibs
RUN pip install --no-cache-dir -e /code/tree_compresser RUN pip install --no-cache-dir -e /code/tree_compresser
COPY ./stac_server /code/stac_server COPY ./stac_server /code/stac_server
WORKDIR /code/stac_server WORKDIR /code/stac_server

View File

@ -36,12 +36,12 @@ B
└─── a=2, b=3/4/5, c=2 └─── a=2, b=3/4/5, c=2
``` ```
We pair the two trees and traverse them in tandem, at each level we group the nodes by node key and for every pair of nodes in a group, compute the values only in A, the values only in B and the We pair the two trees and traverse them in tandem, at each level we group the nodes by node key and for every pair of nodes in a group, compute the values only in A, the values only in B and the
``` ```
for node_a in level_A: for node_a in level_A:
for node_b in level_B: for node_b in level_B:
just_A, intersection, just_B = Qube.fused_set_operations( just_A, intersection, just_B = Qube.fused_set_operations(
node_a.values, node_a.values,
node_b.values node_b.values
) )
``` ```
@ -83,4 +83,4 @@ In order to keep the tree compressed as operations are performed on it we define
* Not the node's values. * Not the node's values.
* The keys, values and children of the nodes children, recursively. * The keys, values and children of the nodes children, recursively.
This structural hash lets us identify when two sibling nodes may be able to be merged into one node thus keeping the tree compressed. This structural hash lets us identify when two sibling nodes may be able to be merged into one node thus keeping the tree compressed.

View File

@ -1,4 +1,4 @@
# API # API
## Set Operations ## Set Operations
@ -11,4 +11,3 @@ A = Qube.from_dict({
}) })
A A
``` ```

View File

@ -2,4 +2,4 @@
parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
cd "$parent_path" cd "$parent_path"
sphinx-autobuild . _build sphinx-autobuild . _build

View File

@ -37,7 +37,7 @@ Unfortunately, we have more than one data file. If we are lucky, the set of iden
} }
``` ```
with the property that any particular choice for a value for any key will correspond to datafile that exists. So this object represents `2x1x3x1x2x2x4 = 96` different datafiles. with the property that any particular choice for a value for any key will correspond to datafile that exists. So this object represents `2x1x3x1x2x2x4 = 96` different datafiles.
To save space I will also represent this same thing like this: To save space I will also represent this same thing like this:
``` ```
@ -84,4 +84,4 @@ Without the above restriction we could, for example, have:
but we do not allow this because it would mean we would have to take multiple branches in order to find data with `expver=0001`. but we do not allow this because it would mean we would have to take multiple branches in order to find data with `expver=0001`.
``` ```
What we have now is a tree of dense datacubes which represents a single larger sparse datacube in a more compact manner. For want of a better word we'll call it a Qube. What we have now is a tree of dense datacubes which represents a single larger sparse datacube in a more compact manner. For want of a better word we'll call it a Qube.

View File

@ -1,10 +1,10 @@
# Command Line Usage # Command Line Usage
```bash ```bash
fdb list class=rd,expver=0001,... | qubed --from=fdblist --to=text fdb list class=rd,expver=0001,... | qubed --from=fdblist --to=text
``` ```
`--from` options include: `--from` options include:
* `fdblist` * `fdblist`
* `json` * `json`
* `protobuf` * `protobuf`
@ -24,4 +24,4 @@ use `--input` and `--output` to specify input and output files respectively.
There's some handy test data in the `tests/data` directory. For example: There's some handy test data in the `tests/data` directory. For example:
```bash ```bash
gzip -dc tests/data/fdb_list_compact.gz| qubed --from=fdblist gzip -dc tests/data/fdb_list_compact.gz| qubed --from=fdblist
``` ```

View File

@ -6,10 +6,10 @@
# -- Project information ----------------------------------------------------- # -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = 'qubed' project = "qubed"
copyright = '2025, Tom Hodson (ECMWF)' copyright = "2025, Tom Hodson (ECMWF)"
author = 'Tom Hodson (ECMWF)' author = "Tom Hodson (ECMWF)"
release = '0.1.0' release = "0.1.0"
# -- General configuration --------------------------------------------------- # -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@ -20,8 +20,8 @@ extensions = [
"myst_nb", # For parsing markdown "myst_nb", # For parsing markdown
] ]
templates_path = ['_templates'] templates_path = ["_templates"]
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', "jupyter_execute"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "jupyter_execute"]
source_suffix = { source_suffix = {
@ -32,4 +32,4 @@ source_suffix = {
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = "sphinx_rtd_theme" html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"] html_static_path = ["_static"]

View File

@ -5,4 +5,4 @@ To build the develop branch from source install a rust toolchain and pip install
git clone -b develop git@github.com:ecmwf/qubed.git git clone -b develop git@github.com:ecmwf/qubed.git
cd qubed cd qubed
maturin develop maturin develop
``` ```

View File

@ -54,14 +54,3 @@ Distinct datasets: {climate_dt.n_leaves},
Number of nodes in the tree: {climate_dt.n_nodes} Number of nodes in the tree: {climate_dt.n_nodes}
""") """)
``` ```

View File

@ -157,7 +157,7 @@ B = Qube.from_dict({"a=2/3/4" : {"b=j/k/l" : {}},})
A.print(), B.print(); A.print(), B.print();
``` ```
Union: Union:
```{code-cell} python3 ```{code-cell} python3
(A | B).print(); (A | B).print();
@ -188,4 +188,4 @@ Symmetric Difference:
```{code-cell} python3 ```{code-cell} python3
def capitalize(node): return node.replace(key = node.key.capitalize()) def capitalize(node): return node.replace(key = node.key.capitalize())
climate_dt.transform(capitalize).html(depth=1) climate_dt.transform(capitalize).html(depth=1)
``` ```

View File

@ -1,2 +1,2 @@
numpy numpy
scipy scipy

2
fiab/.gitignore vendored
View File

@ -1 +1 @@
!*.json !*.json

View File

@ -9,7 +9,7 @@ Simplest possible product
So we could say "here are all the models with param=2t with lead times in the specified interval" So we could say "here are all the models with param=2t with lead times in the specified interval"
quantiles quantiles
param: param:
float range from 0 - 100 float range from 0 - 100
threshold: threshold:
@ -18,8 +18,8 @@ threshold:
product requrements can be specified as a set of: product requrements can be specified as a set of:
params: one or more params params: one or more params
levels: one or more or all levels: one or more or all
time: time:
- product could be specific to a particular time - product could be specific to a particular time
- could require at least a months worth of data - could require at least a months worth of data
@ -27,11 +27,11 @@ threshold:
make some fake models that have: make some fake models that have:
- fewer params - fewer params
- continous times vs steps of 6 hours - continous times vs steps of 6 hours
- -
Could also represent what data is currently cached on disk and be able to then tell the use what stuff they can generate really fast. Could also represent what data is currently cached on disk and be able to then tell the use what stuff they can generate really fast.
API want: API want:
- way to get axis span like what params exist - way to get axis span like what params exist
- -

View File

@ -1,42 +1,48 @@
import json import json
from collections import defaultdict from collections import defaultdict
from qubed import Qube
metadata = json.load(open("raw_anemoi_metadata.json")) metadata = json.load(open("raw_anemoi_metadata.json"))
predicted_indices = [*metadata['data_indices']['data']['output']['prognostic'], *metadata['data_indices']['data']['output']['diagnostic']] predicted_indices = [
variables = metadata['dataset']["variables"] *metadata["data_indices"]["data"]["output"]["prognostic"],
*metadata["data_indices"]["data"]["output"]["diagnostic"],
]
variables = metadata["dataset"]["variables"]
variables = [variables[i] for i in predicted_indices] variables = [variables[i] for i in predicted_indices]
# print('Raw Model Variables:', variables) # print('Raw Model Variables:', variables)
# Split variables between pressure and surface # Split variables between pressure and surface
surface_variables = [v for v in variables if '_' not in v] surface_variables = [v for v in variables if "_" not in v]
# Collect the levels for each pressure variable # Collect the levels for each pressure variable
level_variables = defaultdict(list) level_variables = defaultdict(list)
for v in variables: for v in variables:
if '_' in v: if "_" in v:
variable, level = v.split("_") variable, level = v.split("_")
level_variables[variable].append(int(level)) level_variables[variable].append(int(level))
# print(level_variables) # print(level_variables)
# Use qubed library to contruct tree
from qubed import Qube
model_tree = Qube.empty() model_tree = Qube.empty()
for variable, levels in level_variables.items(): for variable, levels in level_variables.items():
model_tree = model_tree | Qube.from_datacube({ model_tree = model_tree | Qube.from_datacube(
"levtype": "pl", {
"param" : variable, "levtype": "pl",
"level" : levels, "param": variable,
}) "level": levels,
}
)
for variable in surface_variables: for variable in surface_variables:
model_tree = model_tree | Qube.from_datacube({ model_tree = model_tree | Qube.from_datacube(
"levtype": "sfc", {
"param" : variable, "levtype": "sfc",
}) "param": variable,
}
)
print(model_tree.to_json()) print(model_tree.to_json())

File diff suppressed because one or more lines are too long

View File

@ -8,7 +8,7 @@
class: rd class: rd
stream: anemoi stream: anemoi
expver: something expver: something
lead_time: lead_time:
type: datetime type: datetime
format: '%Y-%m-%d %H:%M:%S' format: '%Y-%m-%d %H:%M:%S'
step: 6h step: 6h
@ -41,7 +41,7 @@
class: rd class: rd
stream: anemoi stream: anemoi
expver: something expver: something
lead_time: lead_time:
type: datetime type: datetime
format: '%Y-%m-%d %H:%M:%S' format: '%Y-%m-%d %H:%M:%S'
step: 6h step: 6h
@ -64,4 +64,4 @@
- node: ocean_variables - node: ocean_variables
cube: cube:
param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"] param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"]
ocean_levels: [??, ??] ocean_levels: [??, ??]

View File

@ -6,16 +6,19 @@ from typing import Any, Callable, Iterable, Literal
@dataclass(frozen=True) @dataclass(frozen=True)
class HTML(): class HTML:
html: str html: str
def _repr_html_(self): def _repr_html_(self):
return self.html return self.html
@dataclass(frozen=True) @dataclass(frozen=True)
class Values(ABC): class Values(ABC):
@abstractmethod @abstractmethod
def summary(self) -> str: def summary(self) -> str:
pass pass
@abstractmethod @abstractmethod
def __len__(self) -> int: def __len__(self) -> int:
pass pass
@ -25,30 +28,37 @@ class Values(ABC):
pass pass
@abstractmethod @abstractmethod
def from_strings(self, values: list[str]) -> list['Values']: def from_strings(self, values: list[str]) -> list["Values"]:
pass pass
@dataclass(frozen=True) @dataclass(frozen=True)
class Enum(Values): class Enum(Values):
""" """
The simplest kind of key value is just a list of strings. The simplest kind of key value is just a list of strings.
summary -> string1/string2/string.... summary -> string1/string2/string....
""" """
values: list[Any] values: list[Any]
def __len__(self) -> int: def __len__(self) -> int:
return len(self.values) return len(self.values)
def summary(self) -> str: def summary(self) -> str:
return '/'.join(sorted(self.values)) return "/".join(sorted(self.values))
def __contains__(self, value: Any) -> bool: def __contains__(self, value: Any) -> bool:
return value in self.values return value in self.values
def from_strings(self, values: list[str]) -> list['Values']:
def from_strings(self, values: list[str]) -> list["Values"]:
return [Enum(values)] return [Enum(values)]
@dataclass(frozen=True) @dataclass(frozen=True)
class Range(Values, ABC): class Range(Values, ABC):
dtype: str = dataclasses.field(kw_only=True) dtype: str = dataclasses.field(kw_only=True)
@dataclass(frozen=True) @dataclass(frozen=True)
class DateRange(Range): class DateRange(Range):
start: date start: date
@ -57,54 +67,67 @@ class DateRange(Range):
dtype: Literal["date"] = dataclasses.field(kw_only=True, default="date") dtype: Literal["date"] = dataclasses.field(kw_only=True, default="date")
@classmethod @classmethod
def from_strings(self, values: list[str]) -> list['DateRange']: def from_strings(self, values: list[str]) -> list["DateRange"]:
dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values]) dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values])
if len(dates) < 2: if len(dates) < 2:
return [DateRange( return [DateRange(start=dates[0], end=dates[0], step=timedelta(days=0))]
start=dates[0],
end=dates[0],
step=timedelta(days=0)
)]
ranges = [] ranges = []
current_range, dates = [dates[0],], dates[1:] current_range, dates = (
[
dates[0],
],
dates[1:],
)
while len(dates) > 1: while len(dates) > 1:
if dates[0] - current_range[-1] == timedelta(days=1): if dates[0] - current_range[-1] == timedelta(days=1):
current_range.append(dates.pop(0)) current_range.append(dates.pop(0))
elif len(current_range) == 1: elif len(current_range) == 1:
ranges.append(DateRange( ranges.append(
start=current_range[0], DateRange(
end=current_range[0], start=current_range[0],
step=timedelta(days=0) end=current_range[0],
)) step=timedelta(days=0),
current_range = [dates.pop(0),] )
)
current_range = [
dates.pop(0),
]
else: else:
ranges.append(DateRange( ranges.append(
start=current_range[0], DateRange(
end=current_range[-1], start=current_range[0],
step=timedelta(days=1) end=current_range[-1],
)) step=timedelta(days=1),
current_range = [dates.pop(0),] )
)
current_range = [
dates.pop(0),
]
return ranges return ranges
def __contains__(self, value: Any) -> bool: def __contains__(self, value: Any) -> bool:
v = datetime.strptime(value, "%Y%m%d").date() v = datetime.strptime(value, "%Y%m%d").date()
return self.start <= v <= self.end and (v - self.start) % self.step == 0 return self.start <= v <= self.end and (v - self.start) % self.step == 0
def __len__(self) -> int: def __len__(self) -> int:
return (self.end - self.start) // self.step return (self.end - self.start) // self.step
def summary(self) -> str: def summary(self) -> str:
def fmt(d): return d.strftime("%Y%m%d") def fmt(d):
return d.strftime("%Y%m%d")
if self.step == timedelta(days=0): if self.step == timedelta(days=0):
return f"{fmt(self.start)}" return f"{fmt(self.start)}"
if self.step == timedelta(days=1): if self.step == timedelta(days=1):
return f"{fmt(self.start)}/to/{fmt(self.end)}" return f"{fmt(self.start)}/to/{fmt(self.end)}"
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}" return (
f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
)
@dataclass(frozen=True) @dataclass(frozen=True)
class TimeRange(Range): class TimeRange(Range):
@ -114,53 +137,58 @@ class TimeRange(Range):
dtype: Literal["time"] = dataclasses.field(kw_only=True, default="time") dtype: Literal["time"] = dataclasses.field(kw_only=True, default="time")
@classmethod @classmethod
def from_strings(self, values: list[str]) -> list['TimeRange']: def from_strings(self, values: list[str]) -> list["TimeRange"]:
if len(values) == 0: return [] if len(values) == 0:
return []
times = sorted([int(v) for v in values]) times = sorted([int(v) for v in values])
if len(times) < 2: if len(times) < 2:
return [TimeRange( return [TimeRange(start=times[0], end=times[0], step=100)]
start=times[0],
end=times[0],
step=100
)]
ranges = [] ranges = []
current_range, times = [times[0],], times[1:] current_range, times = (
[
times[0],
],
times[1:],
)
while len(times) > 1: while len(times) > 1:
if times[0] - current_range[-1] == 1: if times[0] - current_range[-1] == 1:
current_range.append(times.pop(0)) current_range.append(times.pop(0))
elif len(current_range) == 1: elif len(current_range) == 1:
ranges.append(TimeRange( ranges.append(
start=current_range[0], TimeRange(start=current_range[0], end=current_range[0], step=0)
end=current_range[0], )
step=0 current_range = [
)) times.pop(0),
current_range = [times.pop(0),] ]
else: else:
ranges.append(TimeRange( ranges.append(
start=current_range[0], TimeRange(start=current_range[0], end=current_range[-1], step=1)
end=current_range[-1], )
step=1 current_range = [
)) times.pop(0),
current_range = [times.pop(0),] ]
return ranges return ranges
def __len__(self) -> int: def __len__(self) -> int:
return (self.end - self.start) // self.step return (self.end - self.start) // self.step
def summary(self) -> str: def summary(self) -> str:
def fmt(d): return f"{d:04d}" def fmt(d):
return f"{d:04d}"
if self.step == 0: if self.step == 0:
return f"{fmt(self.start)}" return f"{fmt(self.start)}"
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}" return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
def __contains__(self, value: Any) -> bool: def __contains__(self, value: Any) -> bool:
v = int(value) v = int(value)
return self.start <= v <= self.end and (v - self.start) % self.step == 0 return self.start <= v <= self.end and (v - self.start) % self.step == 0
@dataclass(frozen=True) @dataclass(frozen=True)
class IntRange(Range): class IntRange(Range):
dtype: Literal["int"] dtype: Literal["int"]
@ -171,33 +199,41 @@ class IntRange(Range):
def __len__(self) -> int: def __len__(self) -> int:
return (self.end - self.start) // self.step return (self.end - self.start) // self.step
def summary(self) -> str: def summary(self) -> str:
def fmt(d): return d.strftime("%Y%m%d") def fmt(d):
return d.strftime("%Y%m%d")
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}" return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
def __contains__(self, value: Any) -> bool: def __contains__(self, value: Any) -> bool:
v = int(value) v = int(value)
return self.start <= v <= self.end and (v - self.start) % self.step == 0 return self.start <= v <= self.end and (v - self.start) % self.step == 0
def values_from_json(obj) -> Values: def values_from_json(obj) -> Values:
if isinstance(obj, list): if isinstance(obj, list):
return Enum(obj) return Enum(obj)
match obj["dtype"]: match obj["dtype"]:
case "date": return DateRange(**obj) case "date":
case "time": return TimeRange(**obj) return DateRange(**obj)
case "int": return IntRange(**obj) case "time":
case _: raise ValueError(f"Unknown dtype {obj['dtype']}") return TimeRange(**obj)
case "int":
return IntRange(**obj)
case _:
raise ValueError(f"Unknown dtype {obj['dtype']}")
@dataclass(frozen=True) @dataclass(frozen=True)
class Node: class Node:
key: str key: str
values: Values # Must support len() values: Values # Must support len()
metadata: dict[str, str] # Applies to all children metadata: dict[str, str] # Applies to all children
payload: list[Any] # List of size product(len(n.values) for n in ancestors(self)) payload: list[Any] # List of size product(len(n.values) for n in ancestors(self))
children: list['Node'] children: list["Node"]
def summarize_node(node: Node) -> tuple[str, Node]: def summarize_node(node: Node) -> tuple[str, Node]:
""" """
@ -205,7 +241,7 @@ def summarize_node(node: Node) -> tuple[str, Node]:
Returns the summary string and the last node in the chain that has multiple children. Returns the summary string and the last node in the chain that has multiple children.
""" """
summary = [] summary = []
while True: while True:
values_summary = node.values.summary() values_summary = node.values.summary()
if len(values_summary) > 50: if len(values_summary) > 50:
@ -219,29 +255,35 @@ def summarize_node(node: Node) -> tuple[str, Node]:
return ", ".join(summary), node return ", ".join(summary), node
def node_tree_to_string(node : Node, prefix : str = "", depth = None) -> Iterable[str]:
def node_tree_to_string(node: Node, prefix: str = "", depth=None) -> Iterable[str]:
summary, node = summarize_node(node) summary, node = summarize_node(node)
if depth is not None and depth <= 0: if depth is not None and depth <= 0:
yield summary + " - ...\n" yield summary + " - ...\n"
return return
# Special case for nodes with only a single child, this makes the printed representation more compact # Special case for nodes with only a single child, this makes the printed representation more compact
elif len(node.children) == 1: elif len(node.children) == 1:
yield summary + ", " yield summary + ", "
yield from node_tree_to_string(node.children[0], prefix, depth = depth) yield from node_tree_to_string(node.children[0], prefix, depth=depth)
return return
else: else:
yield summary + "\n" yield summary + "\n"
for index, child in enumerate(node.children): for index, child in enumerate(node.children):
connector = "└── " if index == len(node.children) - 1 else "├── " connector = "└── " if index == len(node.children) - 1 else "├── "
yield prefix + connector yield prefix + connector
extension = " " if index == len(node.children) - 1 else "" extension = " " if index == len(node.children) - 1 else ""
yield from node_tree_to_string(child, prefix + extension, depth = depth - 1 if depth is not None else None) yield from node_tree_to_string(
child, prefix + extension, depth=depth - 1 if depth is not None else None
)
def node_tree_to_html(node : Node, prefix : str = "", depth = 1, connector = "") -> Iterable[str]:
def node_tree_to_html(
node: Node, prefix: str = "", depth=1, connector=""
) -> Iterable[str]:
summary, node = summarize_node(node) summary, node = summarize_node(node)
if len(node.children) == 0: if len(node.children) == 0:
yield f'<span class="leaf">{connector}{summary}</span>' yield f'<span class="leaf">{connector}{summary}</span>'
return return
@ -252,32 +294,36 @@ def node_tree_to_html(node : Node, prefix : str = "", depth = 1, connector = "")
for index, child in enumerate(node.children): for index, child in enumerate(node.children):
connector = "└── " if index == len(node.children) - 1 else "├── " connector = "└── " if index == len(node.children) - 1 else "├── "
extension = " " if index == len(node.children) - 1 else "" extension = " " if index == len(node.children) - 1 else ""
yield from node_tree_to_html(child, prefix + extension, depth = depth - 1, connector = prefix+connector) yield from node_tree_to_html(
child, prefix + extension, depth=depth - 1, connector=prefix + connector
)
yield "</details>" yield "</details>"
@dataclass(frozen=True) @dataclass(frozen=True)
class CompressedTree: class CompressedTree:
root: Node root: Node
@classmethod @classmethod
def from_json(cls, json: dict) -> 'CompressedTree': def from_json(cls, json: dict) -> "CompressedTree":
def from_json(json: dict) -> Node: def from_json(json: dict) -> Node:
return Node( return Node(
key=json["key"], key=json["key"],
values=values_from_json(json["values"]), values=values_from_json(json["values"]),
metadata=json["metadata"] if "metadata" in json else {}, metadata=json["metadata"] if "metadata" in json else {},
payload=json["payload"] if "payload" in json else [], payload=json["payload"] if "payload" in json else [],
children=[from_json(c) for c in json["children"]] children=[from_json(c) for c in json["children"]],
) )
return CompressedTree(root=from_json(json)) return CompressedTree(root=from_json(json))
def __str__(self): def __str__(self):
return "".join(node_tree_to_string(node=self.root)) return "".join(node_tree_to_string(node=self.root))
def html(self, depth = 2) -> HTML: def html(self, depth=2) -> HTML:
return HTML(self._repr_html_(depth = depth)) return HTML(self._repr_html_(depth=depth))
def _repr_html_(self, depth = 2): def _repr_html_(self, depth=2):
css = """ css = """
<style> <style>
.qubed-tree-view { .qubed-tree-view {
@ -316,67 +362,100 @@ class CompressedTree:
</style> </style>
""" """
nodes = "".join(cc for c in self.root.children for cc in node_tree_to_html(node=c, depth=depth)) nodes = "".join(
cc
for c in self.root.children
for cc in node_tree_to_html(node=c, depth=depth)
)
return f"{css}<pre class='qubed-tree-view'>{nodes}</pre>" return f"{css}<pre class='qubed-tree-view'>{nodes}</pre>"
def print(self, depth = None):
print("".join(cc for c in self.root.children for cc in node_tree_to_string(node=c, depth = depth)))
def transform(self, func: Callable[[Node], Node]) -> 'CompressedTree': def print(self, depth=None):
print(
"".join(
cc
for c in self.root.children
for cc in node_tree_to_string(node=c, depth=depth)
)
)
def transform(self, func: Callable[[Node], Node]) -> "CompressedTree":
"Call a function on every node of the tree, any changes to the children of a node will be ignored." "Call a function on every node of the tree, any changes to the children of a node will be ignored."
def transform(node: Node) -> Node: def transform(node: Node) -> Node:
new_node = func(node) new_node = func(node)
return dataclasses.replace(new_node, children = [transform(c) for c in node.children]) return dataclasses.replace(
new_node, children=[transform(c) for c in node.children]
)
return CompressedTree(root=transform(self.root)) return CompressedTree(root=transform(self.root))
def guess_datatypes(self) -> 'CompressedTree': def guess_datatypes(self) -> "CompressedTree":
def guess_datatypes(node: Node) -> list[Node]: def guess_datatypes(node: Node) -> list[Node]:
# Try to convert enum values into more structured types # Try to convert enum values into more structured types
children = [cc for c in node.children for cc in guess_datatypes(c)] children = [cc for c in node.children for cc in guess_datatypes(c)]
if isinstance(node.values, Enum): if isinstance(node.values, Enum):
match node.key: match node.key:
case "time": range_class = TimeRange case "time":
case "date": range_class = DateRange range_class = TimeRange
case _: range_class = None case "date":
range_class = DateRange
case _:
range_class = None
if range_class is not None: if range_class is not None:
return [ return [
dataclasses.replace(node, values = range, children = children) dataclasses.replace(node, values=range, children=children)
for range in range_class.from_strings(node.values.values) for range in range_class.from_strings(node.values.values)
] ]
return [dataclasses.replace(node, children = children)] return [dataclasses.replace(node, children=children)]
children = [cc for c in self.root.children for cc in guess_datatypes(c)] children = [cc for c in self.root.children for cc in guess_datatypes(c)]
return CompressedTree(root=dataclasses.replace(self.root, children = children)) return CompressedTree(root=dataclasses.replace(self.root, children=children))
def select(
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'CompressedTree': self,
selection: dict[str, str | list[str]],
mode: Literal["strict", "relaxed"] = "relaxed",
) -> "CompressedTree":
# make all values lists # make all values lists
selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()} selection = {k: v if isinstance(v, list) else [v] for k, v in selection.items()}
def not_none(xs): return [x for x in xs if x is not None] def not_none(xs):
return [x for x in xs if x is not None]
def select(node: Node) -> Node | None: def select(node: Node) -> Node | None:
# Check if the key is specified in the selection # Check if the key is specified in the selection
if node.key not in selection: if node.key not in selection:
if mode == "strict": if mode == "strict":
return None return None
return dataclasses.replace(node, children = not_none(select(c) for c in node.children)) return dataclasses.replace(
node, children=not_none(select(c) for c in node.children)
# If the key is specified, check if any of the values match )
values = Enum([ c for c in selection[node.key] if c in node.values])
# If the key is specified, check if any of the values match
values = Enum([c for c in selection[node.key] if c in node.values])
if not values:
return None
return dataclasses.replace(
node, values=values, children=not_none(select(c) for c in node.children)
)
return CompressedTree(
root=dataclasses.replace(
self.root, children=not_none(select(c) for c in self.root.children)
)
)
if not values:
return None
return dataclasses.replace(node, values = values, children = not_none(select(c) for c in node.children))
return CompressedTree(root=dataclasses.replace(self.root, children = not_none(select(c) for c in self.root.children)))
def to_list_of_cubes(self): def to_list_of_cubes(self):
def to_list_of_cubes(node: Node) -> list[list[Node]]: def to_list_of_cubes(node: Node) -> list[list[Node]]:
return [[node] + sub_cube for c in node.children for sub_cube in to_list_of_cubes(c)] return [
[node] + sub_cube
for c in node.children
for sub_cube in to_list_of_cubes(c)
]
return to_list_of_cubes(self.root) return to_list_of_cubes(self.root)
@ -384,12 +463,10 @@ class CompressedTree:
cubes = self.to_list_of_cubes() cubes = self.to_list_of_cubes()
print(f"Number of distinct paths: {len(cubes)}") print(f"Number of distinct paths: {len(cubes)}")
# What should the interace look like? # What should the interace look like?
# tree = CompressedTree.from_json(...) # tree = CompressedTree.from_json(...)
# tree = CompressedTree.from_protobuf(...) # tree = CompressedTree.from_protobuf(...)
# tree.print(depth = 5) # Prints a nice tree representation # tree.print(depth = 5) # Prints a nice tree representation

View File

@ -57,4 +57,4 @@ dev = [
"pre-commit", "pre-commit",
"isort", "isort",
] ]

2
run.sh
View File

@ -4,4 +4,4 @@ cd backend
--reload-include="*.html" \ --reload-include="*.html" \
--reload-include="*.css" \ --reload-include="*.css" \
--reload-include="*.js" \ --reload-include="*.js" \
--reload-include="*.yaml" --reload-include="*.yaml"

View File

@ -1,7 +1,7 @@
cd backend cd backend
# sudo ../.venv/bin/fastapi dev main.py --port 80 # sudo ../.venv/bin/fastapi dev main.py --port 80
sudo ../.venv/bin/uvicorn main:app --port 80 --host 0.0.0.0 --reload\ sudo ../.venv/bin/uvicorn main:app --port 80 --host 0.0.0.0 --reload\
--reload-include="*.html" \ --reload-include="*.html" \
--reload-include="*.css" \ --reload-include="*.css" \
--reload-include="*.js" \ --reload-include="*.js" \
--reload-include="*.yaml" --reload-include="*.yaml"

View File

@ -12,4 +12,4 @@ sudo docker build \
--tag=eccr.ecmwf.int/qubed/web_query_builder:latest \ --tag=eccr.ecmwf.int/qubed/web_query_builder:latest \
--target=web_query_builder \ --target=web_query_builder \
. .
sudo docker push eccr.ecmwf.int/qubed/web_query_builder:latest sudo docker push eccr.ecmwf.int/qubed/web_query_builder:latest

View File

@ -1 +1 @@
helm upgrade stac-server chart -n stac-server helm upgrade stac-server chart -n stac-server

View File

@ -15,5 +15,5 @@ with open("config/climate-dt/language.yaml") as f:
mars_language = yaml.safe_load(f)["_field"] mars_language = yaml.safe_load(f)["_field"]
print("Storing data in redis") print("Storing data in redis")
r.set('compressed_catalog', json.dumps(compressed_catalog)) r.set("compressed_catalog", json.dumps(compressed_catalog))
r.set('mars_language', json.dumps(mars_language)) r.set("mars_language", json.dumps(mars_language))

View File

@ -1,3 +1,3 @@
# kubectl rollout restart deployment/redis # kubectl rollout restart deployment/redis
kubectl rollout restart deployment/web-query-builder kubectl rollout restart deployment/web-query-builder
kubectl rollout restart deployment/stac-server kubectl rollout restart deployment/stac-server

View File

@ -1,3 +1,3 @@
python3 -m venv .venv python3 -m venv .venv
source .venv/bin/activate source .venv/bin/activate
pip install pyyaml redis pip install pyyaml redis

View File

@ -19,50 +19,50 @@ from .value_types import QEnum, Values, values_from_json
@dataclass(frozen=False, eq=True, order=True, unsafe_hash=True) @dataclass(frozen=False, eq=True, order=True, unsafe_hash=True)
class Qube: class Qube:
data: NodeData data: NodeData
children: tuple['Qube', ...] children: tuple["Qube", ...]
@property @property
def key(self) -> str: def key(self) -> str:
return self.data.key return self.data.key
@property @property
def values(self) -> Values: def values(self) -> Values:
return self.data.values return self.data.values
@property @property
def metadata(self) -> frozendict[str, Any]: def metadata(self) -> frozendict[str, Any]:
return self.data.metadata return self.data.metadata
def replace(self, **kwargs) -> 'Qube': def replace(self, **kwargs) -> "Qube":
data_keys = {k : v for k, v in kwargs.items() if k in ["key", "values", "metadata"]} data_keys = {
node_keys = {k : v for k, v in kwargs.items() if k == "children"} k: v for k, v in kwargs.items() if k in ["key", "values", "metadata"]
}
node_keys = {k: v for k, v in kwargs.items() if k == "children"}
if not data_keys and not node_keys: if not data_keys and not node_keys:
return self return self
if not data_keys: if not data_keys:
return dataclasses.replace(self, **node_keys) return dataclasses.replace(self, **node_keys)
return dataclasses.replace(self, data = dataclasses.replace(self.data, **data_keys), **node_keys)
return dataclasses.replace(
self, data=dataclasses.replace(self.data, **data_keys), **node_keys
)
def summary(self) -> str: def summary(self) -> str:
return self.data.summary() return self.data.summary()
@classmethod @classmethod
def make(cls, key : str, values : Values, children, **kwargs) -> 'Qube': def make(cls, key: str, values: Values, children, **kwargs) -> "Qube":
return cls( return cls(
data = NodeData(key, values, metadata = kwargs.get("metadata", frozendict()) data=NodeData(key, values, metadata=kwargs.get("metadata", frozendict())),
), children=tuple(sorted(children, key=lambda n: ((n.key, n.values.min())))),
children = tuple(sorted(children,
key = lambda n : ((n.key, n.values.min()))
)),
) )
@classmethod @classmethod
def root_node(cls, children: Iterable["Qube"]) -> 'Qube': def root_node(cls, children: Iterable["Qube"]) -> "Qube":
return cls.make("root", QEnum(("root",)), children) return cls.make("root", QEnum(("root",)), children)
@classmethod @classmethod
def from_datacube(cls, datacube: dict[str, str | Sequence[str]]) -> 'Qube': def from_datacube(cls, datacube: dict[str, str | Sequence[str]]) -> "Qube":
key_vals = list(datacube.items())[::-1] key_vals = list(datacube.items())[::-1]
children: list["Qube"] = [] children: list["Qube"] = []
@ -70,12 +70,11 @@ class Qube:
if not isinstance(values, list): if not isinstance(values, list):
values = [values] values = [values]
children = [cls.make(key, QEnum(values), children)] children = [cls.make(key, QEnum(values), children)]
return cls.root_node(children) return cls.root_node(children)
@classmethod @classmethod
def from_json(cls, json: dict) -> 'Qube': def from_json(cls, json: dict) -> "Qube":
def from_json(json: dict) -> Qube: def from_json(json: dict) -> Qube:
return Qube.make( return Qube.make(
key=json["key"], key=json["key"],
@ -83,75 +82,100 @@ class Qube:
metadata=frozendict(json["metadata"]) if "metadata" in json else {}, metadata=frozendict(json["metadata"]) if "metadata" in json else {},
children=(from_json(c) for c in json["children"]), children=(from_json(c) for c in json["children"]),
) )
return from_json(json) return from_json(json)
def to_json(self) -> dict: def to_json(self) -> dict:
def to_json(node: Qube) -> dict: def to_json(node: Qube) -> dict:
return { return {
"key": node.key, "key": node.key,
"values": node.values.to_json(), "values": node.values.to_json(),
"metadata": dict(node.metadata), "metadata": dict(node.metadata),
"children": [to_json(c) for c in node.children] "children": [to_json(c) for c in node.children],
} }
return to_json(self) return to_json(self)
@classmethod @classmethod
def from_dict(cls, d: dict) -> 'Qube': def from_dict(cls, d: dict) -> "Qube":
def from_dict(d: dict) -> list[Qube]: def from_dict(d: dict) -> list[Qube]:
return [ return [
Qube.make( Qube.make(
key=k.split("=")[0], key=k.split("=")[0],
values=QEnum((k.split("=")[1].split("/"))), values=QEnum((k.split("=")[1].split("/"))),
children=from_dict(children) children=from_dict(children),
) for k, children in d.items()] )
for k, children in d.items()
]
return Qube.root_node(from_dict(d)) return Qube.root_node(from_dict(d))
@classmethod @classmethod
def empty(cls) -> 'Qube': def empty(cls) -> "Qube":
return Qube.root_node([]) return Qube.root_node([])
def __str__(self, depth=None, name=None) -> str:
def __str__(self, depth = None, name = None) -> str: node = (
node = dataclasses.replace(self, data = RootNodeData(key = name, values=self.values, metadata=self.metadata)) if name is not None else self dataclasses.replace(
return "".join(node_tree_to_string(node=node, depth = depth)) self,
data=RootNodeData(key=name, values=self.values, metadata=self.metadata),
def print(self, depth = None, name: str | None = None): )
print(self.__str__(depth = depth, name = name)) if name is not None
else self
def html(self, depth = 2, collapse = True, name: str | None = None) -> HTML: )
node = dataclasses.replace(self, data = RootNodeData(key = name, values=self.values, metadata=self.metadata)) if name is not None else self return "".join(node_tree_to_string(node=node, depth=depth))
return HTML(node_tree_to_html(node=node, depth = depth, collapse = collapse))
def print(self, depth=None, name: str | None = None):
print(self.__str__(depth=depth, name=name))
def html(self, depth=2, collapse=True, name: str | None = None) -> HTML:
node = (
dataclasses.replace(
self,
data=RootNodeData(key=name, values=self.values, metadata=self.metadata),
)
if name is not None
else self
)
return HTML(node_tree_to_html(node=node, depth=depth, collapse=collapse))
def _repr_html_(self) -> str: def _repr_html_(self) -> str:
return node_tree_to_html(self, depth = 2, collapse = True) return node_tree_to_html(self, depth=2, collapse=True)
# Allow "key=value/value" / qube to prepend keys # Allow "key=value/value" / qube to prepend keys
def __rtruediv__(self, other: str) -> "Qube": def __rtruediv__(self, other: str) -> "Qube":
key, values = other.split("=") key, values = other.split("=")
values = QEnum((values.split("/"))) values = QEnum((values.split("/")))
return Qube.root_node([Qube.make(key, values, self.children)]) return Qube.root_node([Qube.make(key, values, self.children)])
def __or__(self, other: "Qube") -> "Qube": def __or__(self, other: "Qube") -> "Qube":
return set_operations.operation(self, other, set_operations.SetOperation.UNION, type(self)) return set_operations.operation(
self, other, set_operations.SetOperation.UNION, type(self)
)
def __and__(self, other: "Qube") -> "Qube": def __and__(self, other: "Qube") -> "Qube":
return set_operations.operation(self, other, set_operations.SetOperation.INTERSECTION, type(self)) return set_operations.operation(
self, other, set_operations.SetOperation.INTERSECTION, type(self)
)
def __sub__(self, other: "Qube") -> "Qube": def __sub__(self, other: "Qube") -> "Qube":
return set_operations.operation(self, other, set_operations.SetOperation.DIFFERENCE, type(self)) return set_operations.operation(
self, other, set_operations.SetOperation.DIFFERENCE, type(self)
)
def __xor__(self, other: "Qube") -> "Qube": def __xor__(self, other: "Qube") -> "Qube":
return set_operations.operation(self, other, set_operations.SetOperation.SYMMETRIC_DIFFERENCE, type(self)) return set_operations.operation(
self, other, set_operations.SetOperation.SYMMETRIC_DIFFERENCE, type(self)
)
def leaves(self) -> Iterable[dict[str, str]]: def leaves(self) -> Iterable[dict[str, str]]:
for value in self.values: for value in self.values:
if not self.children: if not self.children:
yield {self.key : value} yield {self.key: value}
for child in self.children: for child in self.children:
for leaf in child.leaves(): for leaf in child.leaves():
if self.key != "root": if self.key != "root":
yield {self.key : value, **leaf} yield {self.key: value, **leaf}
else: else:
yield leaf yield leaf
@ -164,11 +188,10 @@ class Qube:
# print(c) # print(c)
for sub_cube in to_list_of_cubes(c): for sub_cube in to_list_of_cubes(c):
yield dataclasses.replace(node, children=[sub_cube]) yield dataclasses.replace(node, children=[sub_cube])
return Qube.root_node((q for c in self.children for q in to_list_of_cubes(c))) return Qube.root_node((q for c in self.children for q in to_list_of_cubes(c)))
def __getitem__(self, args) -> 'Qube': def __getitem__(self, args) -> "Qube":
if isinstance(args, str): if isinstance(args, str):
specifiers = args.split(",") specifiers = args.split(",")
current = self current = self
@ -180,9 +203,11 @@ class Qube:
current = c current = c
break break
else: else:
raise KeyError(f"Key '{key}' not found in children of '{current.key}'") raise KeyError(
f"Key '{key}' not found in children of '{current.key}'"
)
return Qube.root_node(current.children) return Qube.root_node(current.children)
elif isinstance(args, tuple) and len(args) == 2: elif isinstance(args, tuple) and len(args) == 2:
key, value = args key, value = args
for c in self.children: for c in self.children:
@ -195,72 +220,85 @@ class Qube:
@cached_property @cached_property
def n_leaves(self) -> int: def n_leaves(self) -> int:
# This line makes the equation q.n_leaves + r.n_leaves == (q | r).n_leaves true is q and r have no overlap # This line makes the equation q.n_leaves + r.n_leaves == (q | r).n_leaves true is q and r have no overlap
if self.key == "root" and not self.children: return 0 if self.key == "root" and not self.children:
return len(self.values) * (sum(c.n_leaves for c in self.children) if self.children else 1) return 0
return len(self.values) * (
sum(c.n_leaves for c in self.children) if self.children else 1
)
@cached_property @cached_property
def n_nodes(self) -> int: def n_nodes(self) -> int:
if self.key == "root" and not self.children: return 0 if self.key == "root" and not self.children:
return 0
return 1 + sum(c.n_nodes for c in self.children) return 1 + sum(c.n_nodes for c in self.children)
def transform(self, func: 'Callable[[Qube], Qube | Iterable[Qube]]') -> 'Qube': def transform(self, func: "Callable[[Qube], Qube | Iterable[Qube]]") -> "Qube":
""" """
Call a function on every node of the Qube, return one or more nodes. Call a function on every node of the Qube, return one or more nodes.
If multiple nodes are returned they each get a copy of the (transformed) children of the original node. If multiple nodes are returned they each get a copy of the (transformed) children of the original node.
Any changes to the children of a node will be ignored. Any changes to the children of a node will be ignored.
""" """
def transform(node: Qube) -> list[Qube]: def transform(node: Qube) -> list[Qube]:
children = [cc for c in node.children for cc in transform(c)] children = [cc for c in node.children for cc in transform(c)]
new_nodes = func(node) new_nodes = func(node)
if isinstance(new_nodes, Qube): if isinstance(new_nodes, Qube):
new_nodes = [new_nodes] new_nodes = [new_nodes]
return [new_node.replace(children = children) return [new_node.replace(children=children) for new_node in new_nodes]
for new_node in new_nodes]
children = tuple(cc for c in self.children for cc in transform(c)) children = tuple(cc for c in self.children for cc in transform(c))
return dataclasses.replace(self, children = children) return dataclasses.replace(self, children=children)
def select(
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed", prune=True) -> 'Qube': self,
selection: dict[str, str | list[str]],
mode: Literal["strict", "relaxed"] = "relaxed",
prune=True,
) -> "Qube":
# make all values lists # make all values lists
selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()} selection = {k: v if isinstance(v, list) else [v] for k, v in selection.items()}
def not_none(xs): return tuple(x for x in xs if x is not None) def not_none(xs):
return tuple(x for x in xs if x is not None)
def select(node: Qube) -> Qube | None: def select(node: Qube) -> Qube | None:
# Check if the key is specified in the selection # Check if the key is specified in the selection
if node.key not in selection: if node.key not in selection:
if mode == "strict": if mode == "strict":
return None return None
new_children = not_none(select(c) for c in node.children) new_children = not_none(select(c) for c in node.children)
# prune==true then remove any non-leaf nodes # prune==true then remove any non-leaf nodes
# which have had all their children removed # which have had all their children removed
if prune and node.children and not new_children: if prune and node.children and not new_children:
return None return None
return dataclasses.replace(node, children = new_children) return dataclasses.replace(node, children=new_children)
# If the key is specified, check if any of the values match # If the key is specified, check if any of the values match
values = QEnum((c for c in selection[node.key] if c in node.values)) values = QEnum((c for c in selection[node.key] if c in node.values))
if not values: if not values:
return None return None
data = dataclasses.replace(node.data, values = values) data = dataclasses.replace(node.data, values=values)
return dataclasses.replace(node, data=data, children = not_none(select(c) for c in node.children)) return dataclasses.replace(
node, data=data, children=not_none(select(c) for c in node.children)
return dataclasses.replace(self, children = not_none(select(c) for c in self.children)) )
return dataclasses.replace(
self, children=not_none(select(c) for c in self.children)
)
def span(self, key: str) -> list[str]: def span(self, key: str) -> list[str]:
""" """
Search the whole tree for any value that a given key takes anywhere. Search the whole tree for any value that a given key takes anywhere.
""" """
this = set(self.values) if self.key == key else set() this = set(self.values) if self.key == key else set()
return sorted(this | set(v for c in self.children for v in c.span(key))) return sorted(this | set(v for c in self.children for v in c.span(key)))
def axes(self) -> dict[str, set[str]]: def axes(self) -> dict[str, set[str]]:
""" """
Return a dictionary of all the spans of the keys in the qube. Return a dictionary of all the spans of the keys in the qube.
@ -279,8 +317,11 @@ class Qube:
This hash takes into account the key, values and children's key values recursively. This hash takes into account the key, values and children's key values recursively.
Because nodes are immutable, we only need to compute this once. Because nodes are immutable, we only need to compute this once.
""" """
def hash_node(node: Qube) -> int: def hash_node(node: Qube) -> int:
return hash((node.key, node.values, tuple(c.structural_hash for c in node.children))) return hash(
(node.key, node.values, tuple(c.structural_hash for c in node.children))
)
return hash_node(self) return hash_node(self)
@ -292,4 +333,4 @@ class Qube:
new_children = set_operations.compress_children(new_children) new_children = set_operations.compress_children(new_children)
# Return the now compressed node # Return the now compressed node
return Qube.make(self.key, self.values, new_children) return Qube.make(self.key, self.values, new_children)

View File

@ -1 +1,3 @@
from .Qube import Qube from .Qube import Qube
__all__ = ["Qube"]

View File

@ -10,23 +10,29 @@ console = Console(stderr=True)
def main(): def main():
parser = argparse.ArgumentParser(description="Generate a compressed tree from various inputs.") parser = argparse.ArgumentParser(
description="Generate a compressed tree from various inputs."
)
subparsers = parser.add_subparsers(title="subcommands", required=True) subparsers = parser.add_subparsers(title="subcommands", required=True)
parser_convert = subparsers.add_parser('convert', help='Convert trees from one format to another.') parser_convert = subparsers.add_parser(
parser_another = subparsers.add_parser('another_subcommand', help='Does something else') "convert", help="Convert trees from one format to another."
)
# parser_another = subparsers.add_parser(
# "another_subcommand", help="Does something else"
# )
parser_convert.add_argument( parser_convert.add_argument(
"--input", "--input",
type=argparse.FileType("r"), type=argparse.FileType("r"),
default=sys.stdin, default=sys.stdin,
help="Specify the input file (default: standard input)." help="Specify the input file (default: standard input).",
) )
parser_convert.add_argument( parser_convert.add_argument(
"--output", "--output",
type=argparse.FileType("w"), type=argparse.FileType("w"),
default=sys.stdout, default=sys.stdout,
help="Specify the output file (default: standard output)." help="Specify the output file (default: standard output).",
) )
parser_convert.add_argument( parser_convert.add_argument(
@ -36,25 +42,26 @@ def main():
help="""Specify the input format: help="""Specify the input format:
fdb: the output of fdb list --porcelain fdb: the output of fdb list --porcelain
mars: the output of mars list mars: the output of mars list
""" """,
) )
parser_convert.add_argument( parser_convert.add_argument(
"--output_format", "--output_format",
choices=["text", "html"], choices=["text", "html"],
default="text", default="text",
help="Specify the output format (text or html)." help="Specify the output format (text or html).",
) )
parser_convert.set_defaults(func=convert) parser_convert.set_defaults(func=convert)
args = parser.parse_args() args = parser.parse_args()
args.func(args) args.func(args)
def convert(args): def convert(args):
q = Qube.empty() q = Qube.empty()
for datacube in parse_fdb_list(args.input): for datacube in parse_fdb_list(args.input):
new_branch = Qube.from_datacube(datacube) new_branch = Qube.from_datacube(datacube)
q = (q | Qube.from_datacube(datacube)) q = q | Qube.from_datacube(datacube)
# output = match args.output_format: # output = match args.output_format:
# case "text": # case "text":
@ -71,5 +78,6 @@ def convert(args):
console.print(locals()) console.print(locals())
console.print("FOO", style="white on blue") console.print("FOO", style="white on blue")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -1,16 +1,20 @@
def parse_key_value_pairs(text: str): def parse_key_value_pairs(text: str):
result = {} result = {}
text = text.replace("}{", ",") # Replace segment separators text = text.replace("}{", ",") # Replace segment separators
text = text.replace("{", "").replace("}","").strip() # Remove leading/trailing braces text = (
text.replace("{", "").replace("}", "").strip()
) # Remove leading/trailing braces
for segment in text.split(","): for segment in text.split(","):
if "=" not in segment: print(segment) if "=" not in segment:
print(segment)
key, values = segment.split("=", 1) # Ensure split only happens at first "=" key, values = segment.split("=", 1) # Ensure split only happens at first "="
values = values.split("/") values = values.split("/")
result[key] = values result[key] = values
return result return result
def parse_fdb_list(f): def parse_fdb_list(f):
for line in f.readlines(): for line in f.readlines():
# Handle fdb list normal # Handle fdb list normal
@ -20,4 +24,4 @@ def parse_fdb_list(f):
# handle fdb list --compact # handle fdb list --compact
if line.startswith("retrieve,") and not line.startswith("retrieve,\n"): if line.startswith("retrieve,") and not line.startswith("retrieve,\n"):
line = line[9:] line = line[9:]
yield parse_key_value_pairs(line) yield parse_key_value_pairs(line)

View File

@ -10,13 +10,17 @@ from .value_types import Values
class NodeData: class NodeData:
key: str key: str
values: Values values: Values
metadata: dict[str, tuple[Hashable, ...]] = field(default_factory=frozendict, compare=False) metadata: dict[str, tuple[Hashable, ...]] = field(
default_factory=frozendict, compare=False
)
def summary(self) -> str: def summary(self) -> str:
return f"{self.key}={self.values.summary()}" if self.key != "root" else "root" return f"{self.key}={self.values.summary()}" if self.key != "root" else "root"
@dataclass(frozen=False, eq=True, order=True) @dataclass(frozen=False, eq=True, order=True)
class RootNodeData(NodeData): class RootNodeData(NodeData):
"Helper class to print a custom root name" "Helper class to print a custom root name"
def summary(self) -> str: def summary(self) -> str:
return self.key return self.key

View File

@ -20,16 +20,31 @@ class SetOperation(Enum):
DIFFERENCE = (1, 0, 0) DIFFERENCE = (1, 0, 0)
SYMMETRIC_DIFFERENCE = (1, 0, 1) SYMMETRIC_DIFFERENCE = (1, 0, 1)
def fused_set_operations(A: "Values", B: "Values") -> tuple[list[Values], list[Values], list[Values]]:
def fused_set_operations(
A: "Values", B: "Values"
) -> tuple[list[Values], list[Values], list[Values]]:
if isinstance(A, QEnum) and isinstance(B, QEnum): if isinstance(A, QEnum) and isinstance(B, QEnum):
set_A, set_B = set(A), set(B) set_A, set_B = set(A), set(B)
intersection = set_A & set_B intersection = set_A & set_B
just_A = set_A - intersection just_A = set_A - intersection
just_B = set_B - intersection just_B = set_B - intersection
return [QEnum(just_A),], [QEnum(intersection),], [QEnum(just_B),] return (
[
QEnum(just_A),
raise NotImplementedError("Fused set operations on values types other than QEnum are not yet implemented") ],
[
QEnum(intersection),
],
[
QEnum(just_B),
],
)
raise NotImplementedError(
"Fused set operations on values types other than QEnum are not yet implemented"
)
def node_intersection(A: "Values", B: "Values") -> tuple[Values, Values, Values]: def node_intersection(A: "Values", B: "Values") -> tuple[Values, Values, Values]:
if isinstance(A, QEnum) and isinstance(B, QEnum): if isinstance(A, QEnum) and isinstance(B, QEnum):
@ -38,18 +53,24 @@ def node_intersection(A: "Values", B: "Values") -> tuple[Values, Values, Values]
just_A = set_A - intersection just_A = set_A - intersection
just_B = set_B - intersection just_B = set_B - intersection
return QEnum(just_A), QEnum(intersection), QEnum(just_B) return QEnum(just_A), QEnum(intersection), QEnum(just_B)
raise NotImplementedError("Fused set operations on values types other than QEnum are not yet implemented")
def operation(A: "Qube", B : "Qube", operation_type: SetOperation, node_type) -> "Qube": raise NotImplementedError(
assert A.key == B.key, "The two Qube root nodes must have the same key to perform set operations," \ "Fused set operations on values types other than QEnum are not yet implemented"
f"would usually be two root nodes. They have {A.key} and {B.key} respectively" )
assert A.values == B.values, f"The two Qube root nodes must have the same values to perform set operations {A.values = }, {B.values = }"
def operation(A: "Qube", B: "Qube", operation_type: SetOperation, node_type) -> "Qube":
assert A.key == B.key, (
"The two Qube root nodes must have the same key to perform set operations,"
f"would usually be two root nodes. They have {A.key} and {B.key} respectively"
)
assert A.values == B.values, (
f"The two Qube root nodes must have the same values to perform set operations {A.values = }, {B.values = }"
)
# Group the children of the two nodes by key # Group the children of the two nodes by key
nodes_by_key = defaultdict(lambda : ([], [])) nodes_by_key = defaultdict(lambda: ([], []))
for node in A.children: for node in A.children:
nodes_by_key[node.key][0].append(node) nodes_by_key[node.key][0].append(node)
for node in B.children: for node in B.children:
@ -59,7 +80,9 @@ def operation(A: "Qube", B : "Qube", operation_type: SetOperation, node_type) ->
# For every node group, perform the set operation # For every node group, perform the set operation
for key, (A_nodes, B_nodes) in nodes_by_key.items(): for key, (A_nodes, B_nodes) in nodes_by_key.items():
new_children.extend(_operation(key, A_nodes, B_nodes, operation_type, node_type)) new_children.extend(
_operation(key, A_nodes, B_nodes, operation_type, node_type)
)
# Whenever we modify children we should recompress them # Whenever we modify children we should recompress them
# But since `operation` is already recursive, we only need to compress this level not all levels # But since `operation` is already recursive, we only need to compress this level not all levels
@ -68,10 +91,12 @@ def operation(A: "Qube", B : "Qube", operation_type: SetOperation, node_type) ->
# The values and key are the same so we just replace the children # The values and key are the same so we just replace the children
return replace(A, children=new_children) return replace(A, children=new_children)
# The root node is special so we need a helper method that we can recurse on # The root node is special so we need a helper method that we can recurse on
def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetOperation, node_type) -> Iterable["Qube"]: def _operation(
key: str, A: list["Qube"], B: list["Qube"], operation_type: SetOperation, node_type
) -> Iterable["Qube"]:
# We need to deal with the case where only one of the trees has this key. # We need to deal with the case where only one of the trees has this key.
# To do so we can insert a dummy node with no children and no values into both A and B # To do so we can insert a dummy node with no children and no values into both A and B
keep_just_A, keep_intersection, keep_just_B = operation_type.value keep_just_A, keep_intersection, keep_just_B = operation_type.value
@ -83,12 +108,11 @@ def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetO
for node_a in A: for node_a in A:
for node_b in B: for node_b in B:
# Compute A - B, A & B, B - A # Compute A - B, A & B, B - A
# Update the values for the two source nodes to remove the intersection # Update the values for the two source nodes to remove the intersection
just_a, intersection, just_b = node_intersection( just_a, intersection, just_b = node_intersection(
values[node_a], values[node_a],
values[node_b], values[node_b],
) )
# Remove the intersection from the source nodes # Remove the intersection from the source nodes
@ -97,11 +121,14 @@ def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetO
if keep_intersection: if keep_intersection:
if intersection: if intersection:
new_node_a = replace(node_a, data = replace(node_a.data, values = intersection)) new_node_a = replace(
new_node_b = replace(node_b, data= replace(node_b.data, values = intersection)) node_a, data=replace(node_a.data, values=intersection)
)
new_node_b = replace(
node_b, data=replace(node_b.data, values=intersection)
)
yield operation(new_node_a, new_node_b, operation_type, node_type) yield operation(new_node_a, new_node_b, operation_type, node_type)
# Now we've removed all the intersections we can yield the just_A and just_B parts if needed # Now we've removed all the intersections we can yield the just_A and just_B parts if needed
if keep_just_A: if keep_just_A:
for node in A: for node in A:
@ -112,6 +139,7 @@ def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetO
if values[node]: if values[node]:
yield node_type.make(key, values[node], node.children) yield node_type.make(key, values[node], node.children)
def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]: def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]:
""" """
Helper method tht only compresses a set of nodes, and doesn't do it recursively. Helper method tht only compresses a set of nodes, and doesn't do it recursively.
@ -124,8 +152,8 @@ def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]:
# only care about the key and children of each node, ignore values # only care about the key and children of each node, ignore values
key = hash((child.key, tuple((cc.structural_hash for cc in child.children)))) key = hash((child.key, tuple((cc.structural_hash for cc in child.children))))
identical_children[key].add(child) identical_children[key].add(child)
# Now go through and create new compressed nodes for any groups that need collapsing # Now go through and create new compressed nodes for any groups that need collapsing
new_children = [] new_children = []
for child_set in identical_children.values(): for child_set in identical_children.values():
if len(child_set) > 1: if len(child_set) > 1:
@ -134,19 +162,23 @@ def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]:
key = child_set[0].key key = child_set[0].key
# Compress the children into a single node # Compress the children into a single node
assert all(isinstance(child.data.values, QEnum) for child in child_set), "All children must have QEnum values" assert all(isinstance(child.data.values, QEnum) for child in child_set), (
"All children must have QEnum values"
node_data = NodeData(
key = key,
metadata = frozendict(), # Todo: Implement metadata compression
values = QEnum((v for child in child_set for v in child.data.values.values)),
) )
new_child = node_type(data = node_data, children = child_set[0].children)
node_data = NodeData(
key=key,
metadata=frozendict(), # Todo: Implement metadata compression
values=QEnum(
(v for child in child_set for v in child.data.values.values)
),
)
new_child = node_type(data=node_data, children=child_set[0].children)
else: else:
# If the group is size one just keep it # If the group is size one just keep it
new_child = child_set.pop() new_child = child_set.pop()
new_children.append(new_child) new_children.append(new_child)
return tuple(sorted(new_children, return tuple(
key = lambda n : ((n.key, tuple(sorted(n.values.values)))) sorted(new_children, key=lambda n: ((n.key, tuple(sorted(n.values.values)))))
)) )

View File

@ -6,31 +6,38 @@ from typing import Iterable, Protocol, Sequence, runtime_checkable
@runtime_checkable @runtime_checkable
class TreeLike(Protocol): class TreeLike(Protocol):
@property @property
def children(self) -> Sequence["TreeLike"]: ... # Supports indexing like node.children[i] def children(
self,
) -> Sequence["TreeLike"]: ... # Supports indexing like node.children[i]
def summary(self) -> str: ... def summary(self) -> str: ...
@dataclass(frozen=True) @dataclass(frozen=True)
class HTML(): class HTML:
html: str html: str
def _repr_html_(self): def _repr_html_(self):
return self.html return self.html
def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, str, TreeLike]:
def summarize_node(
node: TreeLike, collapse=False, **kwargs
) -> tuple[str, str, TreeLike]:
""" """
Extracts a summarized representation of the node while collapsing single-child paths. Extracts a summarized representation of the node while collapsing single-child paths.
Returns the summary string and the last node in the chain that has multiple children. Returns the summary string and the last node in the chain that has multiple children.
""" """
summaries = [] summaries = []
paths = [] paths = []
while True: while True:
summary = node.summary(**kwargs) summary = node.summary(**kwargs)
paths.append(summary) paths.append(summary)
if len(summary) > 50: if len(summary) > 50:
summary = summary[:50] + "..." summary = summary[:50] + "..."
summaries.append(summary) summaries.append(summary)
if not collapse: if not collapse:
break break
# Move down if there's exactly one child, otherwise stop # Move down if there's exactly one child, otherwise stop
@ -40,29 +47,35 @@ def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, str
return ", ".join(summaries), ",".join(paths), node return ", ".join(summaries), ",".join(paths), node
def node_tree_to_string(node : TreeLike, prefix : str = "", depth = None) -> Iterable[str]:
def node_tree_to_string(node: TreeLike, prefix: str = "", depth=None) -> Iterable[str]:
summary, path, node = summarize_node(node) summary, path, node = summarize_node(node)
if depth is not None and depth <= 0: if depth is not None and depth <= 0:
yield summary + " - ...\n" yield summary + " - ...\n"
return return
# Special case for nodes with only a single child, this makes the printed representation more compact # Special case for nodes with only a single child, this makes the printed representation more compact
elif len(node.children) == 1: elif len(node.children) == 1:
yield summary + ", " yield summary + ", "
yield from node_tree_to_string(node.children[0], prefix, depth = depth) yield from node_tree_to_string(node.children[0], prefix, depth=depth)
return return
else: else:
yield summary + "\n" yield summary + "\n"
for index, child in enumerate(node.children): for index, child in enumerate(node.children):
connector = "└── " if index == len(node.children) - 1 else "├── " connector = "└── " if index == len(node.children) - 1 else "├── "
yield prefix + connector yield prefix + connector
extension = " " if index == len(node.children) - 1 else "" extension = " " if index == len(node.children) - 1 else ""
yield from node_tree_to_string(child, prefix + extension, depth = depth - 1 if depth is not None else None) yield from node_tree_to_string(
child, prefix + extension, depth=depth - 1 if depth is not None else None
)
def _node_tree_to_html(node : TreeLike, prefix : str = "", depth = 1, connector = "", **kwargs) -> Iterable[str]:
def _node_tree_to_html(
node: TreeLike, prefix: str = "", depth=1, connector="", **kwargs
) -> Iterable[str]:
summary, path, node = summarize_node(node, **kwargs) summary, path, node = summarize_node(node, **kwargs)
if len(node.children) == 0: if len(node.children) == 0:
yield f'<span class="qubed-node leaf" data-path="{path}">{connector}{summary}</span>' yield f'<span class="qubed-node leaf" data-path="{path}">{connector}{summary}</span>'
return return
@ -73,15 +86,22 @@ def _node_tree_to_html(node : TreeLike, prefix : str = "", depth = 1, connector
for index, child in enumerate(node.children): for index, child in enumerate(node.children):
connector = "└── " if index == len(node.children) - 1 else "├── " connector = "└── " if index == len(node.children) - 1 else "├── "
extension = " " if index == len(node.children) - 1 else "" extension = " " if index == len(node.children) - 1 else ""
yield from _node_tree_to_html(child, prefix + extension, depth = depth - 1, connector = prefix+connector, **kwargs) yield from _node_tree_to_html(
child,
prefix + extension,
depth=depth - 1,
connector=prefix + connector,
**kwargs,
)
yield "</details>" yield "</details>"
def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
css_id = f"qubed-tree-{random.randint(0, 1000000)}" def node_tree_to_html(node: TreeLike, depth=1, **kwargs) -> str:
css_id = f"qubed-tree-{random.randint(0, 1000000)}"
# It's ugle to use an f string here because css uses {} so much so instead
# we use CSS_ID as a placeholder and replace it later # It's ugle to use an f string here because css uses {} so much so instead
css = """ # we use CSS_ID as a placeholder and replace it later
css = """
<style> <style>
pre#CSS_ID { pre#CSS_ID {
font-family: monospace; font-family: monospace;
@ -89,7 +109,7 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
font-family: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace; font-family: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;
font-size: 12px; font-size: 12px;
line-height: 1.4; line-height: 1.4;
details { details {
margin-left: 0; margin-left: 0;
} }
@ -128,7 +148,7 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
} }
summary::-webkit-details-marker { summary::-webkit-details-marker {
display: none; display: none;
content: ""; content: "";
} }
@ -136,8 +156,8 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
</style> </style>
""".replace("CSS_ID", css_id) """.replace("CSS_ID", css_id)
# This js snippet copies the path of a node to the clipboard when clicked # This js snippet copies the path of a node to the clipboard when clicked
js = """ js = """
<script type="module" defer> <script type="module" defer>
async function nodeOnClick(event) { async function nodeOnClick(event) {
if (!event.altKey) return; if (!event.altKey) return;
@ -159,5 +179,5 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
nodes.forEach(n => n.addEventListener("click", nodeOnClick)); nodes.forEach(n => n.addEventListener("click", nodeOnClick));
</script> </script>
""".replace("CSS_ID", css_id) """.replace("CSS_ID", css_id)
nodes = "".join(_node_tree_to_html(node=node, depth=depth, **kwargs)) nodes = "".join(_node_tree_to_html(node=node, depth=depth, **kwargs))
return f"{js}{css}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>" return f"{js}{css}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"

View File

@ -2,14 +2,15 @@ from dataclasses import dataclass, field
character = str character = str
@dataclass(unsafe_hash=True) @dataclass(unsafe_hash=True)
class TrieNode(): class TrieNode:
parent: "TrieNode | None" parent: "TrieNode | None"
parent_char: character parent_char: character
children: dict[character, "TrieNode"] = field(default_factory=dict) children: dict[character, "TrieNode"] = field(default_factory=dict)
@dataclass @dataclass
class Trie: class Trie:
root: TrieNode = field(default_factory=lambda: TrieNode(None, "")) root: TrieNode = field(default_factory=lambda: TrieNode(None, ""))
reverse_lookup: dict[int, TrieNode] = field(default_factory=dict) reverse_lookup: dict[int, TrieNode] = field(default_factory=dict)
@ -28,7 +29,7 @@ class Trie:
self.reverse_lookup[n_id] = node self.reverse_lookup[n_id] = node
return n_id return n_id
def lookup_by_id(self, n_id: int): def lookup_by_id(self, n_id: int):
leaf_node = self.reverse_lookup[n_id] leaf_node = self.reverse_lookup[n_id]
string = [] string = []
@ -37,4 +38,3 @@ class Trie:
leaf_node = leaf_node.parent leaf_node = leaf_node.parent
return "".join(reversed(string)) return "".join(reversed(string))

View File

@ -7,11 +7,13 @@ from typing import TYPE_CHECKING, Any, FrozenSet, Iterable, Literal, TypeVar
if TYPE_CHECKING: if TYPE_CHECKING:
from .Qube import Qube from .Qube import Qube
@dataclass(frozen=True) @dataclass(frozen=True)
class Values(ABC): class Values(ABC):
@abstractmethod @abstractmethod
def summary(self) -> str: def summary(self) -> str:
pass pass
@abstractmethod @abstractmethod
def __len__(self) -> int: def __len__(self) -> int:
pass pass
@ -25,9 +27,9 @@ class Values(ABC):
pass pass
@abstractmethod @abstractmethod
def from_strings(self, values: Iterable[str]) -> list['Values']: def from_strings(self, values: Iterable[str]) -> list["Values"]:
pass pass
@abstractmethod @abstractmethod
def min(self): def min(self):
pass pass
@ -36,19 +38,22 @@ class Values(ABC):
def to_json(self): def to_json(self):
pass pass
T = TypeVar("T") T = TypeVar("T")
EnumValuesType = FrozenSet[T] EnumValuesType = FrozenSet[T]
@dataclass(frozen=True, order=True) @dataclass(frozen=True, order=True)
class QEnum(Values): class QEnum(Values):
""" """
The simplest kind of key value is just a list of strings. The simplest kind of key value is just a list of strings.
summary -> string1/string2/string.... summary -> string1/string2/string....
""" """
values: EnumValuesType values: EnumValuesType
def __init__(self, obj): def __init__(self, obj):
object.__setattr__(self, 'values', frozenset(obj)) object.__setattr__(self, "values", frozenset(obj))
def __post_init__(self): def __post_init__(self):
assert isinstance(self.values, tuple) assert isinstance(self.values, tuple)
@ -58,22 +63,30 @@ class QEnum(Values):
def __len__(self) -> int: def __len__(self) -> int:
return len(self.values) return len(self.values)
def summary(self) -> str: def summary(self) -> str:
return '/'.join(map(str, sorted(self.values))) return "/".join(map(str, sorted(self.values)))
def __contains__(self, value: Any) -> bool: def __contains__(self, value: Any) -> bool:
return value in self.values return value in self.values
def from_strings(self, values: Iterable[str]) -> list['Values']:
def from_strings(self, values: Iterable[str]) -> list["Values"]:
return [type(self)(tuple(values))] return [type(self)(tuple(values))]
def min(self): def min(self):
return min(self.values) return min(self.values)
def to_json(self): def to_json(self):
return list(self.values) return list(self.values)
class DateEnum(QEnum):
class DateEnum(QEnum):
def summary(self) -> str: def summary(self) -> str:
def fmt(d): return d.strftime("%Y%m%d") def fmt(d):
return '/'.join(map(fmt, sorted(self.values))) return d.strftime("%Y%m%d")
return "/".join(map(fmt, sorted(self.values)))
@dataclass(frozen=True) @dataclass(frozen=True)
class Range(Values, ABC): class Range(Values, ABC):
@ -85,7 +98,7 @@ class Range(Values, ABC):
def min(self): def min(self):
return self.start return self.start
def __iter__(self) -> Iterable[Any]: def __iter__(self) -> Iterable[Any]:
i = self.start i = self.start
while i <= self.end: while i <= self.end:
@ -95,6 +108,7 @@ class Range(Values, ABC):
def to_json(self): def to_json(self):
return dataclasses.asdict(self) return dataclasses.asdict(self)
@dataclass(frozen=True) @dataclass(frozen=True)
class DateRange(Range): class DateRange(Range):
start: date start: date
@ -116,31 +130,38 @@ class DateRange(Range):
dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values]) dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values])
if len(dates) < 2: if len(dates) < 2:
return [DateEnum(dates)] return [DateEnum(dates)]
ranges = [] ranges = []
current_group, dates = [dates[0],], dates[1:] current_group, dates = (
current_type : Literal["enum", "range"] = "enum" [
dates[0],
],
dates[1:],
)
current_type: Literal["enum", "range"] = "enum"
while len(dates) > 1: while len(dates) > 1:
if current_type == "range": if current_type == "range":
# If the next date fits then add it to the current range # If the next date fits then add it to the current range
if dates[0] - current_group[-1] == timedelta(days=1): if dates[0] - current_group[-1] == timedelta(days=1):
current_group.append(dates.pop(0)) current_group.append(dates.pop(0))
# Emit the current range and start a new one # Emit the current range and start a new one
else: else:
if len(current_group) == 1: if len(current_group) == 1:
ranges.append(DateEnum(current_group)) ranges.append(DateEnum(current_group))
else: else:
ranges.append(DateRange( ranges.append(
start=current_group[0], DateRange(
end=current_group[-1], start=current_group[0],
step=timedelta(days=1) end=current_group[-1],
)) step=timedelta(days=1),
current_group = [dates.pop(0),] )
)
current_group = [
dates.pop(0),
]
current_type = "enum" current_type = "enum"
if current_type == "enum": if current_type == "enum":
# If the next date is one more than the last then switch to range mode # If the next date is one more than the last then switch to range mode
if dates[0] - current_group[-1] == timedelta(days=1): if dates[0] - current_group[-1] == timedelta(days=1):
@ -156,28 +177,35 @@ class DateRange(Range):
# Handle remaining `current_group` # Handle remaining `current_group`
if current_group: if current_group:
if current_type == "range": if current_type == "range":
ranges.append(DateRange( ranges.append(
start=current_group[0], DateRange(
end=current_group[-1], start=current_group[0],
step=timedelta(days=1) end=current_group[-1],
)) step=timedelta(days=1),
)
)
else: else:
ranges.append(DateEnum(current_group)) ranges.append(DateEnum(current_group))
return ranges return ranges
def __contains__(self, value: Any) -> bool: def __contains__(self, value: Any) -> bool:
v = datetime.strptime(value, "%Y%m%d").date() v = datetime.strptime(value, "%Y%m%d").date()
return self.start <= v <= self.end and (v - self.start) % self.step == 0 return self.start <= v <= self.end and (v - self.start) % self.step == 0
def summary(self) -> str: def summary(self) -> str:
def fmt(d): return d.strftime("%Y%m%d") def fmt(d):
return d.strftime("%Y%m%d")
if self.step == timedelta(days=0): if self.step == timedelta(days=0):
return f"{fmt(self.start)}" return f"{fmt(self.start)}"
if self.step == timedelta(days=1): if self.step == timedelta(days=1):
return f"{fmt(self.start)}/to/{fmt(self.end)}" return f"{fmt(self.start)}/to/{fmt(self.end)}"
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}" return (
f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
)
@dataclass(frozen=True) @dataclass(frozen=True)
class TimeRange(Range): class TimeRange(Range):
@ -188,55 +216,60 @@ class TimeRange(Range):
def min(self): def min(self):
return self.start return self.start
def __iter__(self) -> Iterable[Any]: def __iter__(self) -> Iterable[Any]:
return super().__iter__() return super().__iter__()
@classmethod @classmethod
def from_strings(self, values: Iterable[str]) -> list['TimeRange']: def from_strings(self, values: Iterable[str]) -> list["TimeRange"]:
times = sorted([int(v) for v in values]) times = sorted([int(v) for v in values])
if len(times) < 2: if len(times) < 2:
return [TimeRange( return [TimeRange(start=times[0], end=times[0], step=100)]
start=times[0],
end=times[0],
step=100
)]
ranges = [] ranges = []
current_range, times = [times[0],], times[1:] current_range, times = (
[
times[0],
],
times[1:],
)
while len(times) > 1: while len(times) > 1:
if times[0] - current_range[-1] == 1: if times[0] - current_range[-1] == 1:
current_range.append(times.pop(0)) current_range.append(times.pop(0))
elif len(current_range) == 1: elif len(current_range) == 1:
ranges.append(TimeRange( ranges.append(
start=current_range[0], TimeRange(start=current_range[0], end=current_range[0], step=0)
end=current_range[0], )
step=0 current_range = [
)) times.pop(0),
current_range = [times.pop(0),] ]
else: else:
ranges.append(TimeRange( ranges.append(
start=current_range[0], TimeRange(start=current_range[0], end=current_range[-1], step=1)
end=current_range[-1], )
step=1 current_range = [
)) times.pop(0),
current_range = [times.pop(0),] ]
return ranges return ranges
def __len__(self) -> int: def __len__(self) -> int:
return (self.end - self.start) // self.step return (self.end - self.start) // self.step
def summary(self) -> str: def summary(self) -> str:
def fmt(d): return f"{d:04d}" def fmt(d):
return f"{d:04d}"
if self.step == 0: if self.step == 0:
return f"{fmt(self.start)}" return f"{fmt(self.start)}"
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}" return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
def __contains__(self, value: Any) -> bool: def __contains__(self, value: Any) -> bool:
v = int(value) v = int(value)
return self.start <= v <= self.end and (v - self.start) % self.step == 0 return self.start <= v <= self.end and (v - self.start) % self.step == 0
@dataclass(frozen=True) @dataclass(frozen=True)
class IntRange(Range): class IntRange(Range):
start: int start: int
@ -246,70 +279,80 @@ class IntRange(Range):
def __len__(self) -> int: def __len__(self) -> int:
return (self.end - self.start) // self.step return (self.end - self.start) // self.step
def summary(self) -> str: def summary(self) -> str:
def fmt(d): return d def fmt(d):
return d
if self.step == 0: if self.step == 0:
return f"{fmt(self.start)}" return f"{fmt(self.start)}"
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}" return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
def __contains__(self, value: Any) -> bool: def __contains__(self, value: Any) -> bool:
v = int(value) v = int(value)
return self.start <= v <= self.end and (v - self.start) % self.step == 0 return self.start <= v <= self.end and (v - self.start) % self.step == 0
@classmethod @classmethod
def from_strings(self, values: Iterable[str]) -> list['IntRange']: def from_strings(self, values: Iterable[str]) -> list["IntRange"]:
ints = sorted([int(v) for v in values]) ints = sorted([int(v) for v in values])
if len(ints) < 2: if len(ints) < 2:
return [IntRange( return [IntRange(start=ints[0], end=ints[0], step=0)]
start=ints[0],
end=ints[0],
step=0
)]
ranges = [] ranges = []
current_range, ints = [ints[0],], ints[1:] current_range, ints = (
[
ints[0],
],
ints[1:],
)
while len(ints) > 1: while len(ints) > 1:
if ints[0] - current_range[-1] == 1: if ints[0] - current_range[-1] == 1:
current_range.append(ints.pop(0)) current_range.append(ints.pop(0))
elif len(current_range) == 1: elif len(current_range) == 1:
ranges.append(IntRange( ranges.append(
start=current_range[0], IntRange(start=current_range[0], end=current_range[0], step=0)
end=current_range[0], )
step=0 current_range = [
)) ints.pop(0),
current_range = [ints.pop(0),] ]
else: else:
ranges.append(IntRange( ranges.append(
start=current_range[0], IntRange(start=current_range[0], end=current_range[-1], step=1)
end=current_range[-1], )
step=1 current_range = [
)) ints.pop(0),
current_range = [ints.pop(0),] ]
return ranges return ranges
def values_from_json(obj) -> Values: def values_from_json(obj) -> Values:
if isinstance(obj, list): if isinstance(obj, list):
return QEnum(tuple(obj)) return QEnum(tuple(obj))
match obj["dtype"]: match obj["dtype"]:
case "date": return DateRange(**obj) case "date":
case "time": return TimeRange(**obj) return DateRange(**obj)
case "int": return IntRange(**obj) case "time":
case _: raise ValueError(f"Unknown dtype {obj['dtype']}") return TimeRange(**obj)
case "int":
return IntRange(**obj)
case _:
raise ValueError(f"Unknown dtype {obj['dtype']}")
def convert_datatypes(q: "Qube", conversions: dict[str, Values]) -> "Qube": def convert_datatypes(q: "Qube", conversions: dict[str, Values]) -> "Qube":
def _convert(q: "Qube") -> Iterable["Qube"]: def _convert(q: "Qube") -> Iterable["Qube"]:
if q.key in conversions: if q.key in conversions:
data_type = conversions[q.key] data_type = conversions[q.key]
assert isinstance(q.values, QEnum), "Only QEnum values can be converted to other datatypes." assert isinstance(q.values, QEnum), (
"Only QEnum values can be converted to other datatypes."
)
for values_group in data_type.from_strings(q.values): for values_group in data_type.from_strings(q.values):
# print(values_group) # print(values_group)
yield replace(q, data=replace(q.data, values=values_group)) yield replace(q, data=replace(q.data, values=values_group))
else: else:
yield q yield q
return q.transform(_convert) return q.transform(_convert)

View File

@ -20,7 +20,8 @@ app.add_middleware(
allow_headers=["*"], allow_headers=["*"],
) )
@app.get('/favicon.ico', include_in_schema=False)
@app.get("/favicon.ico", include_in_schema=False)
async def favicon(): async def favicon():
return FileResponse("favicon.ico") return FileResponse("favicon.ico")
@ -32,16 +33,15 @@ if "LOCAL_CACHE" in os.environ:
with open(base / "compressed_tree.json", "r") as f: with open(base / "compressed_tree.json", "r") as f:
json_tree = f.read() json_tree = f.read()
with open(base / "language.yaml", "r") as f: with open(base / "language.yaml", "r") as f:
mars_language = yaml.safe_load(f)["_field"] mars_language = yaml.safe_load(f)["_field"]
else: else:
print("Getting cache from redis") print("Getting cache from redis")
r = redis.Redis(host="redis", port=6379, db=0) r = redis.Redis(host="redis", port=6379, db=0)
json_tree = r.get('compressed_catalog') json_tree = r.get("compressed_catalog")
assert json_tree, "No compressed tree found in redis" assert json_tree, "No compressed tree found in redis"
mars_language = json.loads(r.get('mars_language')) mars_language = json.loads(r.get("mars_language"))
print("Loading tree from json") print("Loading tree from json")
c_tree = CompressedTree.from_json(json.loads(json_tree)) c_tree = CompressedTree.from_json(json.loads(json_tree))
@ -51,6 +51,7 @@ tree = c_tree.reconstruct_compressed_ecmwf_style()
print("Ready to serve requests!") print("Ready to serve requests!")
def request_to_dict(request: Request) -> Dict[str, Any]: def request_to_dict(request: Request) -> Dict[str, Any]:
# Convert query parameters to dictionary format # Convert query parameters to dictionary format
request_dict = dict(request.query_params) request_dict = dict(request.query_params)
@ -61,8 +62,10 @@ def request_to_dict(request: Request) -> Dict[str, Any]:
return request_dict return request_dict
def match_against_cache(request, tree): def match_against_cache(request, tree):
if not tree: return {"_END_" : {}} if not tree:
return {"_END_": {}}
matches = {} matches = {}
for k, subtree in tree.items(): for k, subtree in tree.items():
if len(k.split("=")) != 2: if len(k.split("=")) != 2:
@ -71,13 +74,20 @@ def match_against_cache(request, tree):
values = set(values.split(",")) values = set(values.split(","))
if key in request: if key in request:
if isinstance(request[key], list): if isinstance(request[key], list):
matching_values = ",".join(request_value for request_value in request[key] if request_value in values) matching_values = ",".join(
request_value
for request_value in request[key]
if request_value in values
)
if matching_values: if matching_values:
matches[f"{key}={matching_values}"] = match_against_cache(request, subtree) matches[f"{key}={matching_values}"] = match_against_cache(
request, subtree
)
elif request[key] in values: elif request[key] in values:
matches[f"{key}={request[key]}"] = match_against_cache(request, subtree) matches[f"{key}={request[key]}"] = match_against_cache(request, subtree)
if not matches: return {k : {} for k in tree.keys()} if not matches:
return {k: {} for k in tree.keys()}
return matches return matches
@ -87,33 +97,46 @@ def max_tree_depth(tree):
return 0 return 0
return 1 + max(max_tree_depth(v) for v in tree.values()) return 1 + max(max_tree_depth(v) for v in tree.values())
def prune_short_branches(tree, depth = None):
if depth is None: def prune_short_branches(tree, depth=None):
if depth is None:
depth = max_tree_depth(tree) depth = max_tree_depth(tree)
return {k : prune_short_branches(v, depth-1) for k, v in tree.items() if max_tree_depth(v) == depth-1} return {
k: prune_short_branches(v, depth - 1)
for k, v in tree.items()
if max_tree_depth(v) == depth - 1
}
def get_paths_to_leaves(tree): def get_paths_to_leaves(tree):
for k,v in tree.items(): for k, v in tree.items():
if not v: if not v:
yield [k,] yield [
else: k,
]
else:
for leaf in get_paths_to_leaves(v): for leaf in get_paths_to_leaves(v):
yield [k,] + leaf yield [
k,
] + leaf
def get_leaves(tree): def get_leaves(tree):
for k,v in tree.items(): for k, v in tree.items():
if not v: if not v:
yield k yield k
else: else:
for leaf in get_leaves(v): for leaf in get_leaves(v):
yield leaf yield leaf
@app.get("/api/tree") @app.get("/api/tree")
async def get_tree(request: Request): async def get_tree(request: Request):
request_dict = request_to_dict(request) request_dict = request_to_dict(request)
print(c_tree.multi_match(request_dict)) print(c_tree.multi_match(request_dict))
return c_tree.multi_match(request_dict) return c_tree.multi_match(request_dict)
@app.get("/api/match") @app.get("/api/match")
async def get_match(request: Request): async def get_match(request: Request):
# Convert query parameters to dictionary format # Convert query parameters to dictionary format
@ -122,7 +145,6 @@ async def get_match(request: Request):
# Run the schema matching logic # Run the schema matching logic
match_tree = match_against_cache(request_dict, tree) match_tree = match_against_cache(request_dict, tree)
# Prune the tree to only include branches that are as deep as the deepest match # Prune the tree to only include branches that are as deep as the deepest match
# This means if you don't choose a certain branch at some point # This means if you don't choose a certain branch at some point
# the UI won't keep nagging you to choose a value for that branch # the UI won't keep nagging you to choose a value for that branch
@ -130,18 +152,19 @@ async def get_match(request: Request):
return match_tree return match_tree
@app.get("/api/paths") @app.get("/api/paths")
async def api_paths(request: Request): async def api_paths(request: Request):
request_dict = request_to_dict(request) request_dict = request_to_dict(request)
match_tree = match_against_cache(request_dict, tree) match_tree = match_against_cache(request_dict, tree)
match_tree = prune_short_branches(match_tree) match_tree = prune_short_branches(match_tree)
paths = get_paths_to_leaves(match_tree) paths = get_paths_to_leaves(match_tree)
# deduplicate leaves based on the key # deduplicate leaves based on the key
by_path = defaultdict(lambda : {"paths" : set(), "values" : set()}) by_path = defaultdict(lambda: {"paths": set(), "values": set()})
for p in paths: for p in paths:
if p[-1] == "_END_": continue if p[-1] == "_END_":
continue
key, values = p[-1].split("=") key, values = p[-1].split("=")
values = values.split(",") values = values.split(",")
path = tuple(p[:-1]) path = tuple(p[:-1])
@ -149,66 +172,75 @@ async def api_paths(request: Request):
by_path[key]["values"].update(values) by_path[key]["values"].update(values)
by_path[key]["paths"].add(tuple(path)) by_path[key]["paths"].add(tuple(path))
return [{ return [
{
"paths": list(v["paths"]), "paths": list(v["paths"]),
"key": key, "key": key,
"values": sorted(v["values"], reverse=True), "values": sorted(v["values"], reverse=True),
} for key, v in by_path.items()] }
for key, v in by_path.items()
]
@app.get("/api/stac") @app.get("/api/stac")
async def get_STAC(request: Request): async def get_STAC(request: Request):
request_dict = request_to_dict(request) request_dict = request_to_dict(request)
paths = await api_paths(request) paths = await api_paths(request)
def make_link(key_name, paths, values): def make_link(key_name, paths, values):
"""Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link""" """Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link"""
path = paths[0] path = paths[0]
href_template = f"/stac?{'&'.join(path)}{'&' if path else ''}{key_name}={{}}" href_template = f"/stac?{'&'.join(path)}{'&' if path else ''}{key_name}={{}}"
optional = [False] optional = [False]
optional_str = "Yes" if all(optional) and len(optional) > 0 else ("Sometimes" if any(optional) else "No") # optional_str = (
# "Yes"
# if all(optional) and len(optional) > 0
# else ("Sometimes" if any(optional) else "No")
# )
values_from_mars_language = mars_language.get(key_name, {}).get("values", []) values_from_mars_language = mars_language.get(key_name, {}).get("values", [])
# values = [v[0] if isinstance(v, list) else v for v in values_from_mars_language] # values = [v[0] if isinstance(v, list) else v for v in values_from_mars_language]
if all(isinstance(v, list) for v in values_from_mars_language): if all(isinstance(v, list) for v in values_from_mars_language):
value_descriptions_dict = {k : v[-1] value_descriptions_dict = {
for v in values_from_mars_language k: v[-1]
if len(v) > 1 for v in values_from_mars_language
for k in v[:-1]} if len(v) > 1
for k in v[:-1]
}
value_descriptions = [value_descriptions_dict.get(v, "") for v in values] value_descriptions = [value_descriptions_dict.get(v, "") for v in values]
if not any(value_descriptions): value_descriptions = None if not any(value_descriptions):
value_descriptions = None
return { return {
"title": key_name, "title": key_name,
"generalized_datacube:href_template": href_template, "generalized_datacube:href_template": href_template,
"rel": "child", "rel": "child",
"type": "application/json", "type": "application/json",
"generalized_datacube:dimension" : { "generalized_datacube:dimension": {
"type" : mars_language.get(key_name, {}).get("type", ""), "type": mars_language.get(key_name, {}).get("type", ""),
"description": mars_language.get(key_name, {}).get("description", ""), "description": mars_language.get(key_name, {}).get("description", ""),
"values" : values, "values": values,
"value_descriptions" : value_descriptions, "value_descriptions": value_descriptions,
"optional" : any(optional), "optional": any(optional),
"multiple": True, "multiple": True,
"paths" : paths, "paths": paths,
} },
}
}
def value_descriptions(key, values): def value_descriptions(key, values):
return { return {
v[0] : v[-1] for v in mars_language.get(key, {}).get("values", []) v[0]: v[-1]
for v in mars_language.get(key, {}).get("values", [])
if len(v) > 1 and v[0] in list(values) if len(v) > 1 and v[0] in list(values)
} }
descriptions = { descriptions = {
key : { key: {
"key" : key, "key": key,
"values" : values, "values": values,
"description" : mars_language.get(key, {}).get("description", ""), "description": mars_language.get(key, {}).get("description", ""),
"value_descriptions" : value_descriptions(key,values), "value_descriptions": value_descriptions(key, values),
} }
for key, values in request_dict.items() for key, values in request_dict.items()
} }
@ -219,15 +251,12 @@ async def get_STAC(request: Request):
"stac_version": "1.0.0", "stac_version": "1.0.0",
"id": "partial-matches", "id": "partial-matches",
"description": "STAC collection representing potential children of this request", "description": "STAC collection representing potential children of this request",
"links": [ "links": [make_link(p["key"], p["paths"], p["values"]) for p in paths],
make_link(p["key"], p["paths"], p["values"])
for p in paths
],
"debug": { "debug": {
"request": request_dict, "request": request_dict,
"descriptions": descriptions, "descriptions": descriptions,
"paths" : paths, "paths": paths,
} },
} }
return stac_collection return stac_collection

View File

@ -1,3 +1,3 @@
fastapi[standard] fastapi[standard]
pe pe
redis redis

View File

@ -1,3 +1,3 @@
parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
cd "$parent_path" cd "$parent_path"
LOCAL_CACHE=../config/climate-dt fastapi dev ./main.py --port 8124 --reload LOCAL_CACHE=../config/climate-dt fastapi dev ./main.py --port 8124 --reload

View File

@ -7,7 +7,7 @@
- **Extension [Maturity Classification](https://github.com/radiantearth/stac-spec/tree/master/extensions/README.md#extension-maturity):** Proposal - **Extension [Maturity Classification](https://github.com/radiantearth/stac-spec/tree/master/extensions/README.md#extension-maturity):** Proposal
- **Owner**: @TomHodson - **Owner**: @TomHodson
This STAC extension allows for represention of [generalised datacubes][gen_datacubes]. This STAC extension allows for represention of [generalised datacubes][gen_datacubes].
A datacube has a fixed set of dimensions `[a, b, c..]` , each of which have a fixed span `{a: ["temp","rainfall"], b : [1-7], c:[True, False]}` such that we can access data by indexing, i.e providing a value for each axis, `a="rainfall", b=1, ...`. A generalised datacubes allow the dimensions to change during indexing, so choosing `a="rainfall"` might yield a different set of axes from `a="temp"`. A datacube has a fixed set of dimensions `[a, b, c..]` , each of which have a fixed span `{a: ["temp","rainfall"], b : [1-7], c:[True, False]}` such that we can access data by indexing, i.e providing a value for each axis, `a="rainfall", b=1, ...`. A generalised datacubes allow the dimensions to change during indexing, so choosing `a="rainfall"` might yield a different set of axes from `a="temp"`.
@ -49,7 +49,7 @@ A typical `Catalog` entry with this extension:
"multiple": true, "multiple": true,
} }
"" ""
}, },
], ],
"stac_extensions": [ "stac_extensions": [
@ -136,11 +136,11 @@ A spatial dimension in vertical (z) direction.
| unit | string | The unit of measurement for the data, preferably compliant to [UDUNITS-2](https://ncics.org/portfolio/other-resources/udunits2/) units (singular). | | unit | string | The unit of measurement for the data, preferably compliant to [UDUNITS-2](https://ncics.org/portfolio/other-resources/udunits2/) units (singular). |
| reference_system | string\|number\|object | The spatial reference system for the data, specified as [numerical EPSG code](http://www.epsg-registry.org/), [WKT2 (ISO 19162) string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html) or [PROJJSON object](https://proj.org/specifications/projjson.html). Defaults to EPSG code 4326. | | reference_system | string\|number\|object | The spatial reference system for the data, specified as [numerical EPSG code](http://www.epsg-registry.org/), [WKT2 (ISO 19162) string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html) or [PROJJSON object](https://proj.org/specifications/projjson.html). Defaults to EPSG code 4326. |
A Vertical Spatial Dimension Object MUST specify an `extent` or `values`. It MAY specify both. A Vertical Spatial Dimension Object MUST specify an `extent` or `values`. It MAY specify both.
### Temporal Dimension Object ### Temporal Dimension Object
A temporal dimension based on the ISO 8601 standard. The temporal reference system for the data is expected to be ISO 8601 compliant A temporal dimension based on the ISO 8601 standard. The temporal reference system for the data is expected to be ISO 8601 compliant
(Gregorian calendar / UTC). Data not compliant with ISO 8601 can be represented as an *Additional Dimension Object* with `type` set to `temporal`. (Gregorian calendar / UTC). Data not compliant with ISO 8601 can be represented as an *Additional Dimension Object* with `type` set to `temporal`.
| Field Name | Type | Description | | Field Name | Type | Description |

View File

@ -9,4 +9,4 @@ with data_path.open("r") as f:
compressed_tree = compressed_tree.guess_datatypes() compressed_tree = compressed_tree.guess_datatypes()
compressed_tree.print(depth = 10) compressed_tree.print(depth=10)

View File

@ -5,27 +5,35 @@ from tree_traverser import CompressedTree, RefcountedDict
class CompressedTreeFixed(CompressedTree): class CompressedTreeFixed(CompressedTree):
@classmethod @classmethod
def from_json(cls, data : dict): def from_json(cls, data: dict):
c = cls({}) c = cls({})
c.cache = {} c.cache = {}
ca = data["cache"] ca = data["cache"]
for k, v in ca.items(): for k, v in ca.items():
g = {k2 : ca[str(v2)]["dict"][k2] if k2 in ca[str(v2)]["dict"] else v2 for k2, v2 in v["dict"].items()} g = {
k2: ca[str(v2)]["dict"][k2] if k2 in ca[str(v2)]["dict"] else v2
for k2, v2 in v["dict"].items()
}
c.cache[int(k)] = RefcountedDict(g) c.cache[int(k)] = RefcountedDict(g)
c.cache[int(k)].refcount = v["refcount"] c.cache[int(k)].refcount = v["refcount"]
c.root_hash = data["root_hash"] c.root_hash = data["root_hash"]
c.tree = c.cache[c.root_hash] c.tree = c.cache[c.root_hash]
return c return c
def reconstruct(self, max_depth=None) -> dict[str, dict]: def reconstruct(self, max_depth=None) -> dict[str, dict]:
"Reconstruct the tree as a normal nested dictionary" "Reconstruct the tree as a normal nested dictionary"
def reconstruct_node(h : int, depth : int) -> dict[str, dict]:
def reconstruct_node(h: int, depth: int) -> dict[str, dict]:
if max_depth is not None and depth > max_depth: if max_depth is not None and depth > max_depth:
return {} return {}
return {k : reconstruct_node(v, depth=depth+1) for k, v in self.cache[h].items()} return {
return reconstruct_node(self.root_hash, depth = 0) k: reconstruct_node(v, depth=depth + 1)
for k, v in self.cache[h].items()
}
return reconstruct_node(self.root_hash, depth=0)
data_path = Path("data/compressed_tree_climate_dt.json") data_path = Path("data/compressed_tree_climate_dt.json")
# Print size of file # Print size of file
@ -39,5 +47,6 @@ output_data_path = Path("data/compressed_tree_climate_dt_ecmwf_style.json")
compressed_tree.save(output_data_path) compressed_tree.save(output_data_path)
print(f"climate dt compressed tree ecmwf style: {output_data_path.stat().st_size // 1e6:.1f} MB") print(
f"climate dt compressed tree ecmwf style: {output_data_path.stat().st_size // 1e6:.1f} MB"
)

View File

@ -5,15 +5,15 @@ from tqdm import tqdm
from pathlib import Path from pathlib import Path
import json import json
from more_itertools import chunked from more_itertools import chunked
process = psutil.Process() process = psutil.Process()
def massage_request(r): def massage_request(r):
return {k : v if isinstance(v, list) else [v] return {k: v if isinstance(v, list) else [v] for k, v in r.items()}
for k, v in r.items()}
if __name__ == "__main__": if __name__ == "__main__":
config = """ config = """
--- ---
type: remote type: remote
@ -24,18 +24,18 @@ store: remote
""" """
request = { request = {
"class": "d1", "class": "d1",
"dataset": "climate-dt", "dataset": "climate-dt",
# "date": "19920420", # "date": "19920420",
} }
data_path = Path("data/compressed_tree_climate_dt.json") data_path = Path("data/compressed_tree_climate_dt.json")
if not data_path.exists(): if not data_path.exists():
compressed_tree = CompressedTree({}) compressed_tree = CompressedTree({})
else: else:
compressed_tree = CompressedTree.load(data_path) compressed_tree = CompressedTree.load(data_path)
fdb = backend.PyFDB(fdb_config = config) fdb = backend.PyFDB(fdb_config=config)
visited_path = Path("data/visited_dates.json") visited_path = Path("data/visited_dates.json")
if not visited_path.exists(): if not visited_path.exists():
@ -46,22 +46,24 @@ store: remote
today = datetime.datetime.today() today = datetime.datetime.today()
start = datetime.datetime.strptime("19920420", "%Y%m%d") start = datetime.datetime.strptime("19920420", "%Y%m%d")
date_list = [start + datetime.timedelta(days=x) for x in range((today - start).days)] date_list = [
date_list = [d.strftime("%Y%m%d") for d in date_list if d not in visited_dates] start + datetime.timedelta(days=x) for x in range((today - start).days)
]
date_list = [d.strftime("%Y%m%d") for d in date_list if d not in visited_dates]
for dates in chunked(tqdm(date_list), 5): for dates in chunked(tqdm(date_list), 5):
print(dates[0]) print(dates[0])
print(f"Memory usage: {(process.memory_info().rss)/1e6:.1f} MB") print(f"Memory usage: {(process.memory_info().rss) / 1e6:.1f} MB")
r = request | dict(date = dates) r = request | dict(date=dates)
tree = fdb.traverse_fdb(massage_request(r)) tree = fdb.traverse_fdb(massage_request(r))
compressed_tree.insert_tree(tree) compressed_tree.insert_tree(tree)
compressed_tree.save(data_path) compressed_tree.save(data_path)
for date in dates: for date in dates:
visited_dates.add(date) visited_dates.add(date)
with open(visited_path, "w") as f: with open(visited_path, "w") as f:
json.dump(list(visited_dates), f) json.dump(list(visited_dates), f)
# print(compressed_tree.reconstruct_compressed_ecmwf_style()) # print(compressed_tree.reconstruct_compressed_ecmwf_style())

File diff suppressed because one or more lines are too long

View File

@ -1,113 +1,156 @@
from qubed import Qube from qubed import Qube
d = { d = {
"class=od" : { "class=od": {
"expver=0001": {"param=1":{}, "param=2":{}}, "expver=0001": {"param=1": {}, "param=2": {}},
"expver=0002": {"param=1":{}, "param=2":{}}, "expver=0002": {"param=1": {}, "param=2": {}},
}, },
"class=rd" : { "class=rd": {
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}}, "expver=0001": {"param=1": {}, "param=2": {}, "param=3": {}},
"expver=0002": {"param=1":{}, "param=2":{}}, "expver=0002": {"param=1": {}, "param=2": {}},
}, },
} }
q = Qube.from_dict(d) q = Qube.from_dict(d)
def test_eq(): def test_eq():
r = Qube.from_dict(d) r = Qube.from_dict(d)
assert q == r assert q == r
def test_getitem(): def test_getitem():
assert q["class", "od"] == Qube.from_dict({ assert q["class", "od"] == Qube.from_dict(
"expver=0001": {"param=1":{}, "param=2":{}}, {
"expver=0002": {"param=1":{}, "param=2":{}}, "expver=0001": {"param=1": {}, "param=2": {}},
}) "expver=0002": {"param=1": {}, "param=2": {}},
assert q["class", "od"]["expver", "0001"] == Qube.from_dict({ }
"param=1":{}, "param=2":{}, )
}) assert q["class", "od"]["expver", "0001"] == Qube.from_dict(
{
"param=1": {},
"param=2": {},
}
)
def test_n_leaves(): def test_n_leaves():
q = Qube.from_dict({ q = Qube.from_dict(
"a=1/2/3" : {"b=1/2/3" : {"c=1/2/3" : {}}}, {"a=1/2/3": {"b=1/2/3": {"c=1/2/3": {}}}, "a=5": {"b=4": {"c=4": {}}}}
"a=5" : { "b=4" : { "c=4" : {}}} )
})
# Size is 3*3*3 + 1*1*1 = 27 + 1 # Size is 3*3*3 + 1*1*1 = 27 + 1
assert q.n_leaves == 27 + 1 assert q.n_leaves == 27 + 1
def test_n_leaves_empty(): def test_n_leaves_empty():
assert Qube.empty().n_leaves == 0 assert Qube.empty().n_leaves == 0
def test_n_nodes_empty(): def test_n_nodes_empty():
assert Qube.empty().n_nodes == 0 assert Qube.empty().n_nodes == 0
def test_union(): def test_union():
q = Qube.from_dict({"a=1/2/3" : {"b=1" : {}},}) q = Qube.from_dict(
r = Qube.from_dict({"a=2/3/4" : {"b=2" : {}},}) {
"a=1/2/3": {"b=1": {}},
}
)
r = Qube.from_dict(
{
"a=2/3/4": {"b=2": {}},
}
)
u = Qube.from_dict({ u = Qube.from_dict(
"a=4" : {"b=2" : {}}, {
"a=1" : {"b=1" : {}}, "a=4": {"b=2": {}},
"a=2/3" : {"b=1/2" : {}}, "a=1": {"b=1": {}},
"a=2/3": {"b=1/2": {}},
}) }
)
assert q | r == u assert q | r == u
def test_union_with_empty(): def test_union_with_empty():
q = Qube.from_dict({"a=1/2/3" : {"b=1" : {}},}) q = Qube.from_dict(
assert q | Qube.empty() == q {
"a=1/2/3": {"b=1": {}},
}
)
assert q | Qube.empty() == q
def test_union_2(): def test_union_2():
q = Qube.from_datacube({ q = Qube.from_datacube(
"class": "d1", {
"dataset": ["climate-dt", "another-value"], "class": "d1",
'generation': ['1', "2", "3"], "dataset": ["climate-dt", "another-value"],
}) "generation": ["1", "2", "3"],
r = Qube.from_datacube({
"class": "d1",
"dataset": ["weather-dt", "climate-dt"],
'generation': ['1', "2", "3", "4"],
})
u = Qube.from_dict({
"class=d1" : {
"dataset=climate-dt/weather-dt" : {
"generation=1/2/3/4" : {},
},
"dataset=another-value" : {
"generation=1/2/3" : {},
},
} }
}) )
r = Qube.from_datacube(
{
"class": "d1",
"dataset": ["weather-dt", "climate-dt"],
"generation": ["1", "2", "3", "4"],
}
)
u = Qube.from_dict(
{
"class=d1": {
"dataset=climate-dt/weather-dt": {
"generation=1/2/3/4": {},
},
"dataset=another-value": {
"generation=1/2/3": {},
},
}
}
)
assert q | r == u assert q | r == u
def test_difference(): def test_difference():
q = Qube.from_dict({"a=1/2/3/5" : {"b=1" : {}},}) q = Qube.from_dict(
r = Qube.from_dict({"a=2/3/4" : {"b=1" : {}},}) {
"a=1/2/3/5": {"b=1": {}},
}
)
r = Qube.from_dict(
{
"a=2/3/4": {"b=1": {}},
}
)
i = Qube.from_dict({ i = Qube.from_dict(
"a=1/5" : {"b=1" : {}}, {
"a=1/5": {"b=1": {}},
}) }
)
assert q - r == i assert q - r == i
def test_order_independence(): def test_order_independence():
u = Qube.from_dict({ u = Qube.from_dict(
"a=4" : {"b=2" : {}}, {
"a=1" : {"b=2" : {}, "b=1" : {}}, "a=4": {"b=2": {}},
"a=2/3" : {"b=1/2" : {}}, "a=1": {"b=2": {}, "b=1": {}},
"a=2/3": {"b=1/2": {}},
}
)
}) v = Qube.from_dict(
{
"a=2/3": {"b=1/2": {}},
"a=4": {"b=2": {}},
"a=1": {"b=1": {}, "b=2": {}},
}
)
v = Qube.from_dict({ assert u == v
"a=2/3" : {"b=1/2" : {}},
"a=4" : {"b=2" : {}},
"a=1" : {"b=1" : {}, "b=2" : {}},
})
assert u == v

View File

@ -2,28 +2,32 @@ from qubed import Qube
def test_smoke(): def test_smoke():
q = Qube.from_dict({ q = Qube.from_dict(
"class=od" : { {
"expver=0001": {"param=1":{}, "param=2":{}}, "class=od": {
"expver=0002": {"param=1":{}, "param=2":{}}, "expver=0001": {"param=1": {}, "param=2": {}},
}, "expver=0002": {"param=1": {}, "param=2": {}},
"class=rd" : { },
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}}, "class=rd": {
"expver=0002": {"param=1":{}, "param=2":{}}, "expver=0001": {"param=1": {}, "param=2": {}, "param=3": {}},
}, "expver=0002": {"param=1": {}, "param=2": {}},
}) },
}
)
# root # root
# ├── class=od, expver=0001/0002, param=1/2 # ├── class=od, expver=0001/0002, param=1/2
# └── class=rd # └── class=rd
# ├── expver=0001, param=1/2/3 # ├── expver=0001, param=1/2/3
# └── expver=0002, param=1/2 # └── expver=0002, param=1/2
ct = Qube.from_dict({ ct = Qube.from_dict(
"class=od" : {"expver=0001/0002": {"param=1/2":{}}}, {
"class=rd" : { "class=od": {"expver=0001/0002": {"param=1/2": {}}},
"expver=0001": {"param=1/2/3":{}}, "class=rd": {
"expver=0002": {"param=1/2":{}}, "expver=0001": {"param=1/2/3": {}},
}, "expver=0002": {"param=1/2": {}},
}) },
}
)
assert q.compress() == ct assert q.compress() == ct

View File

@ -2,15 +2,17 @@ from qubed import Qube
def test_json_round_trip(): def test_json_round_trip():
u = Qube.from_dict({ u = Qube.from_dict(
"class=d1" : { {
"dataset=climate-dt/weather-dt" : { "class=d1": {
"generation=1/2/3/4" : {}, "dataset=climate-dt/weather-dt": {
}, "generation=1/2/3/4": {},
"dataset=another-value" : { },
"generation=1/2/3" : {}, "dataset=another-value": {
}, "generation=1/2/3": {},
},
}
} }
}) )
json = u.to_json() json = u.to_json()
assert Qube.from_json(json) == u assert Qube.from_json(json) == u

View File

@ -1,18 +1,18 @@
from qubed import Qube from qubed import Qube
d = { d = {
"class=od" : { "class=od": {
"expver=0001": {"param=1":{}, "param=2":{}}, "expver=0001": {"param=1": {}, "param=2": {}},
"expver=0002": {"param=1":{}, "param=2":{}}, "expver=0002": {"param=1": {}, "param=2": {}},
}, },
"class=rd" : { "class=rd": {
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}}, "expver=0001": {"param=1": {}, "param=2": {}, "param=3": {}},
"expver=0002": {"param=1":{}, "param=2":{}}, "expver=0002": {"param=1": {}, "param=2": {}},
}, },
} }
q = Qube.from_dict(d).compress() q = Qube.from_dict(d).compress()
as_string= """ as_string = """
root root
class=od, expver=0001/0002, param=1/2 class=od, expver=0001/0002, param=1/2
class=rd class=rd
@ -24,8 +24,10 @@ as_html = """
<details open data-path="root"><summary class="qubed-node">root</summary><span class="qubed-node leaf" data-path="class=od,expver=0001/0002,param=1/2"> class=od, expver=0001/0002, param=1/2</span><details open data-path="class=rd"><summary class="qubed-node"> class=rd</summary><span class="qubed-node leaf" data-path="expver=0001,param=1/2/3"> expver=0001, param=1/2/3</span><span class="qubed-node leaf" data-path="expver=0002,param=1/2"> expver=0002, param=1/2</span></details></details> <details open data-path="root"><summary class="qubed-node">root</summary><span class="qubed-node leaf" data-path="class=od,expver=0001/0002,param=1/2"> class=od, expver=0001/0002, param=1/2</span><details open data-path="class=rd"><summary class="qubed-node"> class=rd</summary><span class="qubed-node leaf" data-path="expver=0001,param=1/2/3"> expver=0001, param=1/2/3</span><span class="qubed-node leaf" data-path="expver=0002,param=1/2"> expver=0002, param=1/2</span></details></details>
""".strip() """.strip()
def test_string(): def test_string():
assert str(q).strip() == as_string assert str(q).strip() == as_string
def test_html(): def test_html():
assert as_html in q._repr_html_() assert as_html in q._repr_html_()

View File

@ -3,17 +3,16 @@ from qubed import Qube
def test_iter_leaves_simple(): def test_iter_leaves_simple():
def make_hashable(l): def make_hashable(list_like):
for d in l: for d in list_like:
yield frozendict(d) yield frozendict(d)
q = Qube.from_dict({
"a=1/2" : {"b=1/2" : {}} q = Qube.from_dict({"a=1/2": {"b=1/2": {}}})
})
entries = [ entries = [
{"a" : '1', "b" : '1'}, {"a": "1", "b": "1"},
{"a" : '1', "b" : '2'}, {"a": "1", "b": "2"},
{"a" : '2', "b" : '1'}, {"a": "2", "b": "1"},
{"a" : '2', "b" : '2'}, {"a": "2", "b": "2"},
] ]
assert set(make_hashable(q.leaves())) == set(make_hashable(entries)) assert set(make_hashable(q.leaves())) == set(make_hashable(entries))

View File

@ -2,4 +2,4 @@ from qubed.rust import hello
def test_hello(): def test_hello():
assert hello("World") == "Hello, World!" assert hello("World") == "Hello, World!"

View File

@ -1,19 +1,22 @@
from qubed import Qube from qubed import Qube
def test_leaf_conservation(): def test_leaf_conservation():
q = Qube.from_dict({ q = Qube.from_dict(
"class=d1": {"dataset=climate-dt" : { {
"time=0000": {"param=130/134/137/146/147/151/165/166/167/168/169" : {}}, "class=d1": {
"time=0001": {"param=130": {}}, "dataset=climate-dt": {
}}}) "time=0000": {
"param=130/134/137/146/147/151/165/166/167/168/169": {}
},
"time=0001": {"param=130": {}},
}
}
}
)
r = Qube.from_datacube({ r = Qube.from_datacube(
"class": "d1", {"class": "d1", "dataset": "climate-dt", "time": "0001", "param": "134"}
"dataset": "climate-dt", )
"time": "0001",
"param": "134"
})
assert q.n_leaves + r.n_leaves == (q | r).n_leaves assert q.n_leaves + r.n_leaves == (q | r).n_leaves

View File

@ -2,28 +2,32 @@ from qubed import Qube
def test_smoke(): def test_smoke():
q = Qube.from_dict({ q = Qube.from_dict(
"class=od" : { {
"expver=0001": {"param=1":{}, "param=2":{}}, "class=od": {
"expver=0002": {"param=1":{}, "param=2":{}}, "expver=0001": {"param=1": {}, "param=2": {}},
}, "expver=0002": {"param=1": {}, "param=2": {}},
"class=rd" : { },
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}}, "class=rd": {
"expver=0002": {"param=1":{}, "param=2":{}}, "expver=0001": {"param=1": {}, "param=2": {}, "param=3": {}},
}, "expver=0002": {"param=1": {}, "param=2": {}},
}) },
}
)
# root # root
# ├── class=od, expver=0001/0002, param=1/2 # ├── class=od, expver=0001/0002, param=1/2
# └── class=rd # └── class=rd
# ├── expver=0001, param=1/2/3 # ├── expver=0001, param=1/2/3
# └── expver=0002, param=1/2 # └── expver=0002, param=1/2
ct = Qube.from_dict({ ct = Qube.from_dict(
"class=od" : {"expver=0001/0002": {"param=1/2":{}}}, {
"class=rd" : { "class=od": {"expver=0001/0002": {"param=1/2": {}}},
"expver=0001": {"param=1/2/3":{}}, "class=rd": {
"expver=0002": {"param=1/2":{}}, "expver=0001": {"param=1/2/3": {}},
}, "expver=0002": {"param=1/2": {}},
}) },
}
)
assert q.compress() == ct assert q.compress() == ct

View File

@ -1 +1 @@
API_HOST=localhost:8124 API_HOST=localhost:8124

View File

@ -15,16 +15,19 @@ CORS(app, resources={r"/api/*": {"origins": "*"}})
# So flask speaks http while the client speaks https # So flask speaks http while the client speaks https
# Client <-- https ---> Proxy <---- http ---> Flask server # Client <-- https ---> Proxy <---- http ---> Flask server
# For the Oauth flow, flask needs to provide a callback url and it needs to use the right scheme=https # For the Oauth flow, flask needs to provide a callback url and it needs to use the right scheme=https
# This line tells flask to look at HTTP headers set by the TLS proxy to figure out what the original # This line tells flask to look at HTTP headers set by the TLS proxy to figure out what the original
# Traffic looked like. # Traffic looked like.
# See https://flask.palletsprojects.com/en/3.0.x/deploying/proxy_fix/ # See https://flask.palletsprojects.com/en/3.0.x/deploying/proxy_fix/
app.wsgi_app = ProxyFix( app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1
)
config = {} config = {}
@app.route("/") @app.route("/")
def index(): def index():
return render_template("index.html", request = request, config = config, api_url = os.environ.get("API_URL", "/api/stac")) return render_template(
"index.html",
request=request,
config=config,
api_url=os.environ.get("API_URL", "/api/stac"),
)

View File

@ -5,4 +5,4 @@ python-dotenv
flask-login flask-login
flask-cors flask-cors
cachetools cachetools
uvicorn uvicorn

View File

@ -1,2 +1,2 @@
export API_URL="http://127.0.0.1:8124/api/stac" export API_URL="http://127.0.0.1:8124/api/stac"
flask run --debug --port=5006 flask run --debug --port=5006

View File

@ -208,4 +208,4 @@ span.value:hover {
#details { #details {
width: 100%; width: 100%;
} }
} }

View File

@ -24,7 +24,7 @@
<a id="stac-anchor"><button id="stac-btn">Raw STAC</button></a> <a id="stac-anchor"><button id="stac-btn">Raw STAC</button></a>
<button id="next-btn">Next</button> <button id="next-btn">Next</button>
</div> </div>
<div id="items"> <div id="items">
<!-- Items from the STAC catalog will be rendered here --> <!-- Items from the STAC catalog will be rendered here -->
</div> </div>
@ -58,4 +58,4 @@
</script> </script>
<script src="/static/app.js"></script> <script src="/static/app.js"></script>
</body> </body>
</html> </html>