Add pre-commit hooks and run them
This commit is contained in:
parent
162dd48748
commit
68ad80e435
18
.github/workflows/update_version.py
vendored
18
.github/workflows/update_version.py
vendored
@ -4,24 +4,36 @@ from pathlib import Path
|
||||
|
||||
CARGO_TOML_PATH = Path("Cargo.toml")
|
||||
|
||||
|
||||
# Get the latest Git tag and strip the leading 'v' if present
|
||||
def get_git_version():
|
||||
try:
|
||||
version = subprocess.check_output(["git", "describe", "--tags", "--always"], text=True).strip()
|
||||
version = subprocess.check_output(
|
||||
["git", "describe", "--tags", "--always"], text=True
|
||||
).strip()
|
||||
version = re.sub(r"^v", "", version) # Remove leading 'v'
|
||||
return version
|
||||
except subprocess.CalledProcessError:
|
||||
raise RuntimeError("Failed to get Git tag. Make sure you have at least one tag in the repository.")
|
||||
raise RuntimeError(
|
||||
"Failed to get Git tag. Make sure you have at least one tag in the repository."
|
||||
)
|
||||
|
||||
|
||||
# Update version in Cargo.toml
|
||||
def update_cargo_version(new_version):
|
||||
cargo_toml = CARGO_TOML_PATH.read_text()
|
||||
|
||||
# Replace version in [package] section
|
||||
updated_toml = re.sub(r'^version = "[^"]+"', f'version = "{new_version}"', cargo_toml, flags=re.MULTILINE)
|
||||
updated_toml = re.sub(
|
||||
r'^version = "[^"]+"',
|
||||
f'version = "{new_version}"',
|
||||
cargo_toml,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
|
||||
CARGO_TOML_PATH.write_text(updated_toml)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
version = get_git_version()
|
||||
print(f"Parsed version: {version}")
|
||||
|
16
.pre-commit-config.yaml
Normal file
16
.pre-commit-config.yaml
Normal file
@ -0,0 +1,16 @@
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
# - id: check-yaml
|
||||
- id: check-added-large-files
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.9.7
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [ --fix ]
|
||||
- id: ruff-format
|
@ -32,7 +32,3 @@ spec:
|
||||
- {{ .Values.ingress.hostname }}
|
||||
secretName: {{ .Values.ingress.tlsSecretName }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -43,4 +43,3 @@ services:
|
||||
# volumes:
|
||||
# - ./web_query_builder:/code/web_query_builder
|
||||
# restart: always
|
||||
|
||||
|
@ -11,4 +11,3 @@ A = Qube.from_dict({
|
||||
})
|
||||
A
|
||||
```
|
||||
|
||||
|
12
docs/conf.py
12
docs/conf.py
@ -6,10 +6,10 @@
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
project = 'qubed'
|
||||
copyright = '2025, Tom Hodson (ECMWF)'
|
||||
author = 'Tom Hodson (ECMWF)'
|
||||
release = '0.1.0'
|
||||
project = "qubed"
|
||||
copyright = "2025, Tom Hodson (ECMWF)"
|
||||
author = "Tom Hodson (ECMWF)"
|
||||
release = "0.1.0"
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
@ -20,8 +20,8 @@ extensions = [
|
||||
"myst_nb", # For parsing markdown
|
||||
]
|
||||
|
||||
templates_path = ['_templates']
|
||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', "jupyter_execute"]
|
||||
templates_path = ["_templates"]
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "jupyter_execute"]
|
||||
|
||||
|
||||
source_suffix = {
|
||||
|
@ -54,14 +54,3 @@ Distinct datasets: {climate_dt.n_leaves},
|
||||
Number of nodes in the tree: {climate_dt.n_nodes}
|
||||
""")
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,42 +1,48 @@
|
||||
import json
|
||||
from collections import defaultdict
|
||||
|
||||
from qubed import Qube
|
||||
|
||||
metadata = json.load(open("raw_anemoi_metadata.json"))
|
||||
|
||||
predicted_indices = [*metadata['data_indices']['data']['output']['prognostic'], *metadata['data_indices']['data']['output']['diagnostic']]
|
||||
variables = metadata['dataset']["variables"]
|
||||
predicted_indices = [
|
||||
*metadata["data_indices"]["data"]["output"]["prognostic"],
|
||||
*metadata["data_indices"]["data"]["output"]["diagnostic"],
|
||||
]
|
||||
variables = metadata["dataset"]["variables"]
|
||||
variables = [variables[i] for i in predicted_indices]
|
||||
|
||||
# print('Raw Model Variables:', variables)
|
||||
|
||||
# Split variables between pressure and surface
|
||||
surface_variables = [v for v in variables if '_' not in v]
|
||||
surface_variables = [v for v in variables if "_" not in v]
|
||||
|
||||
# Collect the levels for each pressure variable
|
||||
level_variables = defaultdict(list)
|
||||
for v in variables:
|
||||
if '_' in v:
|
||||
if "_" in v:
|
||||
variable, level = v.split("_")
|
||||
level_variables[variable].append(int(level))
|
||||
|
||||
# print(level_variables)
|
||||
|
||||
# Use qubed library to contruct tree
|
||||
from qubed import Qube
|
||||
|
||||
model_tree = Qube.empty()
|
||||
|
||||
for variable, levels in level_variables.items():
|
||||
model_tree = model_tree | Qube.from_datacube({
|
||||
"levtype": "pl",
|
||||
"param" : variable,
|
||||
"level" : levels,
|
||||
})
|
||||
model_tree = model_tree | Qube.from_datacube(
|
||||
{
|
||||
"levtype": "pl",
|
||||
"param": variable,
|
||||
"level": levels,
|
||||
}
|
||||
)
|
||||
|
||||
for variable in surface_variables:
|
||||
model_tree = model_tree | Qube.from_datacube({
|
||||
"levtype": "sfc",
|
||||
"param" : variable,
|
||||
})
|
||||
model_tree = model_tree | Qube.from_datacube(
|
||||
{
|
||||
"levtype": "sfc",
|
||||
"param": variable,
|
||||
}
|
||||
)
|
||||
|
||||
print(model_tree.to_json())
|
@ -6,16 +6,19 @@ from typing import Any, Callable, Iterable, Literal
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HTML():
|
||||
class HTML:
|
||||
html: str
|
||||
|
||||
def _repr_html_(self):
|
||||
return self.html
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Values(ABC):
|
||||
@abstractmethod
|
||||
def summary(self) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def __len__(self) -> int:
|
||||
pass
|
||||
@ -25,30 +28,37 @@ class Values(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def from_strings(self, values: list[str]) -> list['Values']:
|
||||
def from_strings(self, values: list[str]) -> list["Values"]:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Enum(Values):
|
||||
"""
|
||||
The simplest kind of key value is just a list of strings.
|
||||
summary -> string1/string2/string....
|
||||
"""
|
||||
|
||||
values: list[Any]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.values)
|
||||
|
||||
def summary(self) -> str:
|
||||
return '/'.join(sorted(self.values))
|
||||
return "/".join(sorted(self.values))
|
||||
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
return value in self.values
|
||||
def from_strings(self, values: list[str]) -> list['Values']:
|
||||
|
||||
def from_strings(self, values: list[str]) -> list["Values"]:
|
||||
return [Enum(values)]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Range(Values, ABC):
|
||||
dtype: str = dataclasses.field(kw_only=True)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DateRange(Range):
|
||||
start: date
|
||||
@ -57,54 +67,67 @@ class DateRange(Range):
|
||||
dtype: Literal["date"] = dataclasses.field(kw_only=True, default="date")
|
||||
|
||||
@classmethod
|
||||
def from_strings(self, values: list[str]) -> list['DateRange']:
|
||||
def from_strings(self, values: list[str]) -> list["DateRange"]:
|
||||
dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values])
|
||||
if len(dates) < 2:
|
||||
return [DateRange(
|
||||
start=dates[0],
|
||||
end=dates[0],
|
||||
step=timedelta(days=0)
|
||||
)]
|
||||
return [DateRange(start=dates[0], end=dates[0], step=timedelta(days=0))]
|
||||
|
||||
ranges = []
|
||||
current_range, dates = [dates[0],], dates[1:]
|
||||
current_range, dates = (
|
||||
[
|
||||
dates[0],
|
||||
],
|
||||
dates[1:],
|
||||
)
|
||||
while len(dates) > 1:
|
||||
if dates[0] - current_range[-1] == timedelta(days=1):
|
||||
current_range.append(dates.pop(0))
|
||||
|
||||
elif len(current_range) == 1:
|
||||
ranges.append(DateRange(
|
||||
start=current_range[0],
|
||||
end=current_range[0],
|
||||
step=timedelta(days=0)
|
||||
))
|
||||
current_range = [dates.pop(0),]
|
||||
ranges.append(
|
||||
DateRange(
|
||||
start=current_range[0],
|
||||
end=current_range[0],
|
||||
step=timedelta(days=0),
|
||||
)
|
||||
)
|
||||
current_range = [
|
||||
dates.pop(0),
|
||||
]
|
||||
|
||||
else:
|
||||
ranges.append(DateRange(
|
||||
start=current_range[0],
|
||||
end=current_range[-1],
|
||||
step=timedelta(days=1)
|
||||
))
|
||||
current_range = [dates.pop(0),]
|
||||
ranges.append(
|
||||
DateRange(
|
||||
start=current_range[0],
|
||||
end=current_range[-1],
|
||||
step=timedelta(days=1),
|
||||
)
|
||||
)
|
||||
current_range = [
|
||||
dates.pop(0),
|
||||
]
|
||||
return ranges
|
||||
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
v = datetime.strptime(value, "%Y%m%d").date()
|
||||
return self.start <= v <= self.end and (v - self.start) % self.step == 0
|
||||
|
||||
|
||||
def __len__(self) -> int:
|
||||
return (self.end - self.start) // self.step
|
||||
|
||||
def summary(self) -> str:
|
||||
def fmt(d): return d.strftime("%Y%m%d")
|
||||
def fmt(d):
|
||||
return d.strftime("%Y%m%d")
|
||||
|
||||
if self.step == timedelta(days=0):
|
||||
return f"{fmt(self.start)}"
|
||||
if self.step == timedelta(days=1):
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}"
|
||||
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
|
||||
return (
|
||||
f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TimeRange(Range):
|
||||
@ -114,45 +137,49 @@ class TimeRange(Range):
|
||||
dtype: Literal["time"] = dataclasses.field(kw_only=True, default="time")
|
||||
|
||||
@classmethod
|
||||
def from_strings(self, values: list[str]) -> list['TimeRange']:
|
||||
if len(values) == 0: return []
|
||||
def from_strings(self, values: list[str]) -> list["TimeRange"]:
|
||||
if len(values) == 0:
|
||||
return []
|
||||
|
||||
times = sorted([int(v) for v in values])
|
||||
if len(times) < 2:
|
||||
return [TimeRange(
|
||||
start=times[0],
|
||||
end=times[0],
|
||||
step=100
|
||||
)]
|
||||
return [TimeRange(start=times[0], end=times[0], step=100)]
|
||||
|
||||
ranges = []
|
||||
current_range, times = [times[0],], times[1:]
|
||||
current_range, times = (
|
||||
[
|
||||
times[0],
|
||||
],
|
||||
times[1:],
|
||||
)
|
||||
while len(times) > 1:
|
||||
if times[0] - current_range[-1] == 1:
|
||||
current_range.append(times.pop(0))
|
||||
|
||||
elif len(current_range) == 1:
|
||||
ranges.append(TimeRange(
|
||||
start=current_range[0],
|
||||
end=current_range[0],
|
||||
step=0
|
||||
))
|
||||
current_range = [times.pop(0),]
|
||||
ranges.append(
|
||||
TimeRange(start=current_range[0], end=current_range[0], step=0)
|
||||
)
|
||||
current_range = [
|
||||
times.pop(0),
|
||||
]
|
||||
|
||||
else:
|
||||
ranges.append(TimeRange(
|
||||
start=current_range[0],
|
||||
end=current_range[-1],
|
||||
step=1
|
||||
))
|
||||
current_range = [times.pop(0),]
|
||||
ranges.append(
|
||||
TimeRange(start=current_range[0], end=current_range[-1], step=1)
|
||||
)
|
||||
current_range = [
|
||||
times.pop(0),
|
||||
]
|
||||
return ranges
|
||||
|
||||
def __len__(self) -> int:
|
||||
return (self.end - self.start) // self.step
|
||||
|
||||
def summary(self) -> str:
|
||||
def fmt(d): return f"{d:04d}"
|
||||
def fmt(d):
|
||||
return f"{d:04d}"
|
||||
|
||||
if self.step == 0:
|
||||
return f"{fmt(self.start)}"
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
|
||||
@ -161,6 +188,7 @@ class TimeRange(Range):
|
||||
v = int(value)
|
||||
return self.start <= v <= self.end and (v - self.start) % self.step == 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IntRange(Range):
|
||||
dtype: Literal["int"]
|
||||
@ -173,7 +201,9 @@ class IntRange(Range):
|
||||
return (self.end - self.start) // self.step
|
||||
|
||||
def summary(self) -> str:
|
||||
def fmt(d): return d.strftime("%Y%m%d")
|
||||
def fmt(d):
|
||||
return d.strftime("%Y%m%d")
|
||||
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
|
||||
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
@ -186,18 +216,24 @@ def values_from_json(obj) -> Values:
|
||||
return Enum(obj)
|
||||
|
||||
match obj["dtype"]:
|
||||
case "date": return DateRange(**obj)
|
||||
case "time": return TimeRange(**obj)
|
||||
case "int": return IntRange(**obj)
|
||||
case _: raise ValueError(f"Unknown dtype {obj['dtype']}")
|
||||
case "date":
|
||||
return DateRange(**obj)
|
||||
case "time":
|
||||
return TimeRange(**obj)
|
||||
case "int":
|
||||
return IntRange(**obj)
|
||||
case _:
|
||||
raise ValueError(f"Unknown dtype {obj['dtype']}")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Node:
|
||||
key: str
|
||||
values: Values # Must support len()
|
||||
metadata: dict[str, str] # Applies to all children
|
||||
payload: list[Any] # List of size product(len(n.values) for n in ancestors(self))
|
||||
children: list['Node']
|
||||
values: Values # Must support len()
|
||||
metadata: dict[str, str] # Applies to all children
|
||||
payload: list[Any] # List of size product(len(n.values) for n in ancestors(self))
|
||||
children: list["Node"]
|
||||
|
||||
|
||||
def summarize_node(node: Node) -> tuple[str, Node]:
|
||||
"""
|
||||
@ -219,7 +255,8 @@ def summarize_node(node: Node) -> tuple[str, Node]:
|
||||
|
||||
return ", ".join(summary), node
|
||||
|
||||
def node_tree_to_string(node : Node, prefix : str = "", depth = None) -> Iterable[str]:
|
||||
|
||||
def node_tree_to_string(node: Node, prefix: str = "", depth=None) -> Iterable[str]:
|
||||
summary, node = summarize_node(node)
|
||||
|
||||
if depth is not None and depth <= 0:
|
||||
@ -228,7 +265,7 @@ def node_tree_to_string(node : Node, prefix : str = "", depth = None) -> Iterabl
|
||||
# Special case for nodes with only a single child, this makes the printed representation more compact
|
||||
elif len(node.children) == 1:
|
||||
yield summary + ", "
|
||||
yield from node_tree_to_string(node.children[0], prefix, depth = depth)
|
||||
yield from node_tree_to_string(node.children[0], prefix, depth=depth)
|
||||
return
|
||||
else:
|
||||
yield summary + "\n"
|
||||
@ -237,9 +274,14 @@ def node_tree_to_string(node : Node, prefix : str = "", depth = None) -> Iterabl
|
||||
connector = "└── " if index == len(node.children) - 1 else "├── "
|
||||
yield prefix + connector
|
||||
extension = " " if index == len(node.children) - 1 else "│ "
|
||||
yield from node_tree_to_string(child, prefix + extension, depth = depth - 1 if depth is not None else None)
|
||||
yield from node_tree_to_string(
|
||||
child, prefix + extension, depth=depth - 1 if depth is not None else None
|
||||
)
|
||||
|
||||
def node_tree_to_html(node : Node, prefix : str = "", depth = 1, connector = "") -> Iterable[str]:
|
||||
|
||||
def node_tree_to_html(
|
||||
node: Node, prefix: str = "", depth=1, connector=""
|
||||
) -> Iterable[str]:
|
||||
summary, node = summarize_node(node)
|
||||
|
||||
if len(node.children) == 0:
|
||||
@ -252,32 +294,36 @@ def node_tree_to_html(node : Node, prefix : str = "", depth = 1, connector = "")
|
||||
for index, child in enumerate(node.children):
|
||||
connector = "└── " if index == len(node.children) - 1 else "├── "
|
||||
extension = " " if index == len(node.children) - 1 else "│ "
|
||||
yield from node_tree_to_html(child, prefix + extension, depth = depth - 1, connector = prefix+connector)
|
||||
yield from node_tree_to_html(
|
||||
child, prefix + extension, depth=depth - 1, connector=prefix + connector
|
||||
)
|
||||
yield "</details>"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CompressedTree:
|
||||
root: Node
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json: dict) -> 'CompressedTree':
|
||||
def from_json(cls, json: dict) -> "CompressedTree":
|
||||
def from_json(json: dict) -> Node:
|
||||
return Node(
|
||||
key=json["key"],
|
||||
values=values_from_json(json["values"]),
|
||||
metadata=json["metadata"] if "metadata" in json else {},
|
||||
payload=json["payload"] if "payload" in json else [],
|
||||
children=[from_json(c) for c in json["children"]]
|
||||
children=[from_json(c) for c in json["children"]],
|
||||
)
|
||||
|
||||
return CompressedTree(root=from_json(json))
|
||||
|
||||
def __str__(self):
|
||||
return "".join(node_tree_to_string(node=self.root))
|
||||
|
||||
def html(self, depth = 2) -> HTML:
|
||||
return HTML(self._repr_html_(depth = depth))
|
||||
def html(self, depth=2) -> HTML:
|
||||
return HTML(self._repr_html_(depth=depth))
|
||||
|
||||
def _repr_html_(self, depth = 2):
|
||||
def _repr_html_(self, depth=2):
|
||||
css = """
|
||||
<style>
|
||||
.qubed-tree-view {
|
||||
@ -316,67 +362,100 @@ class CompressedTree:
|
||||
</style>
|
||||
|
||||
"""
|
||||
nodes = "".join(cc for c in self.root.children for cc in node_tree_to_html(node=c, depth=depth))
|
||||
nodes = "".join(
|
||||
cc
|
||||
for c in self.root.children
|
||||
for cc in node_tree_to_html(node=c, depth=depth)
|
||||
)
|
||||
return f"{css}<pre class='qubed-tree-view'>{nodes}</pre>"
|
||||
|
||||
def print(self, depth = None):
|
||||
print("".join(cc for c in self.root.children for cc in node_tree_to_string(node=c, depth = depth)))
|
||||
def print(self, depth=None):
|
||||
print(
|
||||
"".join(
|
||||
cc
|
||||
for c in self.root.children
|
||||
for cc in node_tree_to_string(node=c, depth=depth)
|
||||
)
|
||||
)
|
||||
|
||||
def transform(self, func: Callable[[Node], Node]) -> 'CompressedTree':
|
||||
def transform(self, func: Callable[[Node], Node]) -> "CompressedTree":
|
||||
"Call a function on every node of the tree, any changes to the children of a node will be ignored."
|
||||
|
||||
def transform(node: Node) -> Node:
|
||||
new_node = func(node)
|
||||
return dataclasses.replace(new_node, children = [transform(c) for c in node.children])
|
||||
return dataclasses.replace(
|
||||
new_node, children=[transform(c) for c in node.children]
|
||||
)
|
||||
|
||||
return CompressedTree(root=transform(self.root))
|
||||
|
||||
def guess_datatypes(self) -> 'CompressedTree':
|
||||
def guess_datatypes(self) -> "CompressedTree":
|
||||
def guess_datatypes(node: Node) -> list[Node]:
|
||||
# Try to convert enum values into more structured types
|
||||
children = [cc for c in node.children for cc in guess_datatypes(c)]
|
||||
|
||||
if isinstance(node.values, Enum):
|
||||
match node.key:
|
||||
case "time": range_class = TimeRange
|
||||
case "date": range_class = DateRange
|
||||
case _: range_class = None
|
||||
case "time":
|
||||
range_class = TimeRange
|
||||
case "date":
|
||||
range_class = DateRange
|
||||
case _:
|
||||
range_class = None
|
||||
|
||||
if range_class is not None:
|
||||
return [
|
||||
dataclasses.replace(node, values = range, children = children)
|
||||
dataclasses.replace(node, values=range, children=children)
|
||||
for range in range_class.from_strings(node.values.values)
|
||||
]
|
||||
return [dataclasses.replace(node, children = children)]
|
||||
return [dataclasses.replace(node, children=children)]
|
||||
|
||||
children = [cc for c in self.root.children for cc in guess_datatypes(c)]
|
||||
return CompressedTree(root=dataclasses.replace(self.root, children = children))
|
||||
return CompressedTree(root=dataclasses.replace(self.root, children=children))
|
||||
|
||||
|
||||
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'CompressedTree':
|
||||
def select(
|
||||
self,
|
||||
selection: dict[str, str | list[str]],
|
||||
mode: Literal["strict", "relaxed"] = "relaxed",
|
||||
) -> "CompressedTree":
|
||||
# make all values lists
|
||||
selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
|
||||
selection = {k: v if isinstance(v, list) else [v] for k, v in selection.items()}
|
||||
|
||||
def not_none(xs): return [x for x in xs if x is not None]
|
||||
def not_none(xs):
|
||||
return [x for x in xs if x is not None]
|
||||
|
||||
def select(node: Node) -> Node | None:
|
||||
# Check if the key is specified in the selection
|
||||
if node.key not in selection:
|
||||
if mode == "strict":
|
||||
return None
|
||||
return dataclasses.replace(node, children = not_none(select(c) for c in node.children))
|
||||
return dataclasses.replace(
|
||||
node, children=not_none(select(c) for c in node.children)
|
||||
)
|
||||
|
||||
# If the key is specified, check if any of the values match
|
||||
values = Enum([ c for c in selection[node.key] if c in node.values])
|
||||
values = Enum([c for c in selection[node.key] if c in node.values])
|
||||
|
||||
if not values:
|
||||
return None
|
||||
|
||||
return dataclasses.replace(node, values = values, children = not_none(select(c) for c in node.children))
|
||||
return dataclasses.replace(
|
||||
node, values=values, children=not_none(select(c) for c in node.children)
|
||||
)
|
||||
|
||||
return CompressedTree(root=dataclasses.replace(self.root, children = not_none(select(c) for c in self.root.children)))
|
||||
return CompressedTree(
|
||||
root=dataclasses.replace(
|
||||
self.root, children=not_none(select(c) for c in self.root.children)
|
||||
)
|
||||
)
|
||||
|
||||
def to_list_of_cubes(self):
|
||||
def to_list_of_cubes(node: Node) -> list[list[Node]]:
|
||||
return [[node] + sub_cube for c in node.children for sub_cube in to_list_of_cubes(c)]
|
||||
return [
|
||||
[node] + sub_cube
|
||||
for c in node.children
|
||||
for sub_cube in to_list_of_cubes(c)
|
||||
]
|
||||
|
||||
return to_list_of_cubes(self.root)
|
||||
|
||||
@ -385,8 +464,6 @@ class CompressedTree:
|
||||
print(f"Number of distinct paths: {len(cubes)}")
|
||||
|
||||
|
||||
|
||||
|
||||
# What should the interace look like?
|
||||
|
||||
# tree = CompressedTree.from_json(...)
|
||||
|
@ -15,5 +15,5 @@ with open("config/climate-dt/language.yaml") as f:
|
||||
mars_language = yaml.safe_load(f)["_field"]
|
||||
|
||||
print("Storing data in redis")
|
||||
r.set('compressed_catalog', json.dumps(compressed_catalog))
|
||||
r.set('mars_language', json.dumps(mars_language))
|
||||
r.set("compressed_catalog", json.dumps(compressed_catalog))
|
||||
r.set("mars_language", json.dumps(mars_language))
|
||||
|
@ -19,7 +19,7 @@ from .value_types import QEnum, Values, values_from_json
|
||||
@dataclass(frozen=False, eq=True, order=True, unsafe_hash=True)
|
||||
class Qube:
|
||||
data: NodeData
|
||||
children: tuple['Qube', ...]
|
||||
children: tuple["Qube", ...]
|
||||
|
||||
@property
|
||||
def key(self) -> str:
|
||||
@ -33,36 +33,36 @@ class Qube:
|
||||
def metadata(self) -> frozendict[str, Any]:
|
||||
return self.data.metadata
|
||||
|
||||
def replace(self, **kwargs) -> 'Qube':
|
||||
data_keys = {k : v for k, v in kwargs.items() if k in ["key", "values", "metadata"]}
|
||||
node_keys = {k : v for k, v in kwargs.items() if k == "children"}
|
||||
def replace(self, **kwargs) -> "Qube":
|
||||
data_keys = {
|
||||
k: v for k, v in kwargs.items() if k in ["key", "values", "metadata"]
|
||||
}
|
||||
node_keys = {k: v for k, v in kwargs.items() if k == "children"}
|
||||
if not data_keys and not node_keys:
|
||||
return self
|
||||
if not data_keys:
|
||||
return dataclasses.replace(self, **node_keys)
|
||||
|
||||
return dataclasses.replace(self, data = dataclasses.replace(self.data, **data_keys), **node_keys)
|
||||
|
||||
return dataclasses.replace(
|
||||
self, data=dataclasses.replace(self.data, **data_keys), **node_keys
|
||||
)
|
||||
|
||||
def summary(self) -> str:
|
||||
return self.data.summary()
|
||||
|
||||
@classmethod
|
||||
def make(cls, key : str, values : Values, children, **kwargs) -> 'Qube':
|
||||
def make(cls, key: str, values: Values, children, **kwargs) -> "Qube":
|
||||
return cls(
|
||||
data = NodeData(key, values, metadata = kwargs.get("metadata", frozendict())
|
||||
),
|
||||
children = tuple(sorted(children,
|
||||
key = lambda n : ((n.key, n.values.min()))
|
||||
)),
|
||||
data=NodeData(key, values, metadata=kwargs.get("metadata", frozendict())),
|
||||
children=tuple(sorted(children, key=lambda n: ((n.key, n.values.min())))),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def root_node(cls, children: Iterable["Qube"]) -> 'Qube':
|
||||
def root_node(cls, children: Iterable["Qube"]) -> "Qube":
|
||||
return cls.make("root", QEnum(("root",)), children)
|
||||
|
||||
@classmethod
|
||||
def from_datacube(cls, datacube: dict[str, str | Sequence[str]]) -> 'Qube':
|
||||
def from_datacube(cls, datacube: dict[str, str | Sequence[str]]) -> "Qube":
|
||||
key_vals = list(datacube.items())[::-1]
|
||||
|
||||
children: list["Qube"] = []
|
||||
@ -73,9 +73,8 @@ class Qube:
|
||||
|
||||
return cls.root_node(children)
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json: dict) -> 'Qube':
|
||||
def from_json(cls, json: dict) -> "Qube":
|
||||
def from_json(json: dict) -> Qube:
|
||||
return Qube.make(
|
||||
key=json["key"],
|
||||
@ -83,6 +82,7 @@ class Qube:
|
||||
metadata=frozendict(json["metadata"]) if "metadata" in json else {},
|
||||
children=(from_json(c) for c in json["children"]),
|
||||
)
|
||||
|
||||
return from_json(json)
|
||||
|
||||
def to_json(self) -> dict:
|
||||
@ -91,40 +91,56 @@ class Qube:
|
||||
"key": node.key,
|
||||
"values": node.values.to_json(),
|
||||
"metadata": dict(node.metadata),
|
||||
"children": [to_json(c) for c in node.children]
|
||||
"children": [to_json(c) for c in node.children],
|
||||
}
|
||||
|
||||
return to_json(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict) -> 'Qube':
|
||||
def from_dict(cls, d: dict) -> "Qube":
|
||||
def from_dict(d: dict) -> list[Qube]:
|
||||
return [
|
||||
Qube.make(
|
||||
key=k.split("=")[0],
|
||||
values=QEnum((k.split("=")[1].split("/"))),
|
||||
children=from_dict(children)
|
||||
) for k, children in d.items()]
|
||||
children=from_dict(children),
|
||||
)
|
||||
for k, children in d.items()
|
||||
]
|
||||
|
||||
return Qube.root_node(from_dict(d))
|
||||
|
||||
@classmethod
|
||||
def empty(cls) -> 'Qube':
|
||||
def empty(cls) -> "Qube":
|
||||
return Qube.root_node([])
|
||||
|
||||
def __str__(self, depth=None, name=None) -> str:
|
||||
node = (
|
||||
dataclasses.replace(
|
||||
self,
|
||||
data=RootNodeData(key=name, values=self.values, metadata=self.metadata),
|
||||
)
|
||||
if name is not None
|
||||
else self
|
||||
)
|
||||
return "".join(node_tree_to_string(node=node, depth=depth))
|
||||
|
||||
def __str__(self, depth = None, name = None) -> str:
|
||||
node = dataclasses.replace(self, data = RootNodeData(key = name, values=self.values, metadata=self.metadata)) if name is not None else self
|
||||
return "".join(node_tree_to_string(node=node, depth = depth))
|
||||
def print(self, depth=None, name: str | None = None):
|
||||
print(self.__str__(depth=depth, name=name))
|
||||
|
||||
def print(self, depth = None, name: str | None = None):
|
||||
print(self.__str__(depth = depth, name = name))
|
||||
|
||||
def html(self, depth = 2, collapse = True, name: str | None = None) -> HTML:
|
||||
node = dataclasses.replace(self, data = RootNodeData(key = name, values=self.values, metadata=self.metadata)) if name is not None else self
|
||||
return HTML(node_tree_to_html(node=node, depth = depth, collapse = collapse))
|
||||
def html(self, depth=2, collapse=True, name: str | None = None) -> HTML:
|
||||
node = (
|
||||
dataclasses.replace(
|
||||
self,
|
||||
data=RootNodeData(key=name, values=self.values, metadata=self.metadata),
|
||||
)
|
||||
if name is not None
|
||||
else self
|
||||
)
|
||||
return HTML(node_tree_to_html(node=node, depth=depth, collapse=collapse))
|
||||
|
||||
def _repr_html_(self) -> str:
|
||||
return node_tree_to_html(self, depth = 2, collapse = True)
|
||||
return node_tree_to_html(self, depth=2, collapse=True)
|
||||
|
||||
# Allow "key=value/value" / qube to prepend keys
|
||||
def __rtruediv__(self, other: str) -> "Qube":
|
||||
@ -133,25 +149,33 @@ class Qube:
|
||||
return Qube.root_node([Qube.make(key, values, self.children)])
|
||||
|
||||
def __or__(self, other: "Qube") -> "Qube":
|
||||
return set_operations.operation(self, other, set_operations.SetOperation.UNION, type(self))
|
||||
return set_operations.operation(
|
||||
self, other, set_operations.SetOperation.UNION, type(self)
|
||||
)
|
||||
|
||||
def __and__(self, other: "Qube") -> "Qube":
|
||||
return set_operations.operation(self, other, set_operations.SetOperation.INTERSECTION, type(self))
|
||||
return set_operations.operation(
|
||||
self, other, set_operations.SetOperation.INTERSECTION, type(self)
|
||||
)
|
||||
|
||||
def __sub__(self, other: "Qube") -> "Qube":
|
||||
return set_operations.operation(self, other, set_operations.SetOperation.DIFFERENCE, type(self))
|
||||
return set_operations.operation(
|
||||
self, other, set_operations.SetOperation.DIFFERENCE, type(self)
|
||||
)
|
||||
|
||||
def __xor__(self, other: "Qube") -> "Qube":
|
||||
return set_operations.operation(self, other, set_operations.SetOperation.SYMMETRIC_DIFFERENCE, type(self))
|
||||
return set_operations.operation(
|
||||
self, other, set_operations.SetOperation.SYMMETRIC_DIFFERENCE, type(self)
|
||||
)
|
||||
|
||||
def leaves(self) -> Iterable[dict[str, str]]:
|
||||
for value in self.values:
|
||||
if not self.children:
|
||||
yield {self.key : value}
|
||||
yield {self.key: value}
|
||||
for child in self.children:
|
||||
for leaf in child.leaves():
|
||||
if self.key != "root":
|
||||
yield {self.key : value, **leaf}
|
||||
yield {self.key: value, **leaf}
|
||||
else:
|
||||
yield leaf
|
||||
|
||||
@ -165,10 +189,9 @@ class Qube:
|
||||
for sub_cube in to_list_of_cubes(c):
|
||||
yield dataclasses.replace(node, children=[sub_cube])
|
||||
|
||||
|
||||
return Qube.root_node((q for c in self.children for q in to_list_of_cubes(c)))
|
||||
|
||||
def __getitem__(self, args) -> 'Qube':
|
||||
def __getitem__(self, args) -> "Qube":
|
||||
if isinstance(args, str):
|
||||
specifiers = args.split(",")
|
||||
current = self
|
||||
@ -180,7 +203,9 @@ class Qube:
|
||||
current = c
|
||||
break
|
||||
else:
|
||||
raise KeyError(f"Key '{key}' not found in children of '{current.key}'")
|
||||
raise KeyError(
|
||||
f"Key '{key}' not found in children of '{current.key}'"
|
||||
)
|
||||
return Qube.root_node(current.children)
|
||||
|
||||
elif isinstance(args, tuple) and len(args) == 2:
|
||||
@ -195,38 +220,47 @@ class Qube:
|
||||
@cached_property
|
||||
def n_leaves(self) -> int:
|
||||
# This line makes the equation q.n_leaves + r.n_leaves == (q | r).n_leaves true is q and r have no overlap
|
||||
if self.key == "root" and not self.children: return 0
|
||||
return len(self.values) * (sum(c.n_leaves for c in self.children) if self.children else 1)
|
||||
if self.key == "root" and not self.children:
|
||||
return 0
|
||||
return len(self.values) * (
|
||||
sum(c.n_leaves for c in self.children) if self.children else 1
|
||||
)
|
||||
|
||||
@cached_property
|
||||
def n_nodes(self) -> int:
|
||||
if self.key == "root" and not self.children: return 0
|
||||
if self.key == "root" and not self.children:
|
||||
return 0
|
||||
return 1 + sum(c.n_nodes for c in self.children)
|
||||
|
||||
def transform(self, func: 'Callable[[Qube], Qube | Iterable[Qube]]') -> 'Qube':
|
||||
def transform(self, func: "Callable[[Qube], Qube | Iterable[Qube]]") -> "Qube":
|
||||
"""
|
||||
Call a function on every node of the Qube, return one or more nodes.
|
||||
If multiple nodes are returned they each get a copy of the (transformed) children of the original node.
|
||||
Any changes to the children of a node will be ignored.
|
||||
"""
|
||||
|
||||
def transform(node: Qube) -> list[Qube]:
|
||||
children = [cc for c in node.children for cc in transform(c)]
|
||||
new_nodes = func(node)
|
||||
if isinstance(new_nodes, Qube):
|
||||
new_nodes = [new_nodes]
|
||||
|
||||
return [new_node.replace(children = children)
|
||||
for new_node in new_nodes]
|
||||
return [new_node.replace(children=children) for new_node in new_nodes]
|
||||
|
||||
children = tuple(cc for c in self.children for cc in transform(c))
|
||||
return dataclasses.replace(self, children = children)
|
||||
return dataclasses.replace(self, children=children)
|
||||
|
||||
|
||||
def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed", prune=True) -> 'Qube':
|
||||
def select(
|
||||
self,
|
||||
selection: dict[str, str | list[str]],
|
||||
mode: Literal["strict", "relaxed"] = "relaxed",
|
||||
prune=True,
|
||||
) -> "Qube":
|
||||
# make all values lists
|
||||
selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
|
||||
selection = {k: v if isinstance(v, list) else [v] for k, v in selection.items()}
|
||||
|
||||
def not_none(xs): return tuple(x for x in xs if x is not None)
|
||||
def not_none(xs):
|
||||
return tuple(x for x in xs if x is not None)
|
||||
|
||||
def select(node: Qube) -> Qube | None:
|
||||
# Check if the key is specified in the selection
|
||||
@ -241,7 +275,7 @@ class Qube:
|
||||
if prune and node.children and not new_children:
|
||||
return None
|
||||
|
||||
return dataclasses.replace(node, children = new_children)
|
||||
return dataclasses.replace(node, children=new_children)
|
||||
|
||||
# If the key is specified, check if any of the values match
|
||||
values = QEnum((c for c in selection[node.key] if c in node.values))
|
||||
@ -249,10 +283,14 @@ class Qube:
|
||||
if not values:
|
||||
return None
|
||||
|
||||
data = dataclasses.replace(node.data, values = values)
|
||||
return dataclasses.replace(node, data=data, children = not_none(select(c) for c in node.children))
|
||||
data = dataclasses.replace(node.data, values=values)
|
||||
return dataclasses.replace(
|
||||
node, data=data, children=not_none(select(c) for c in node.children)
|
||||
)
|
||||
|
||||
return dataclasses.replace(self, children = not_none(select(c) for c in self.children))
|
||||
return dataclasses.replace(
|
||||
self, children=not_none(select(c) for c in self.children)
|
||||
)
|
||||
|
||||
def span(self, key: str) -> list[str]:
|
||||
"""
|
||||
@ -279,8 +317,11 @@ class Qube:
|
||||
This hash takes into account the key, values and children's key values recursively.
|
||||
Because nodes are immutable, we only need to compute this once.
|
||||
"""
|
||||
|
||||
def hash_node(node: Qube) -> int:
|
||||
return hash((node.key, node.values, tuple(c.structural_hash for c in node.children)))
|
||||
return hash(
|
||||
(node.key, node.values, tuple(c.structural_hash for c in node.children))
|
||||
)
|
||||
|
||||
return hash_node(self)
|
||||
|
||||
|
@ -1 +1,3 @@
|
||||
from .Qube import Qube
|
||||
|
||||
__all__ = ["Qube"]
|
||||
|
@ -10,23 +10,29 @@ console = Console(stderr=True)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate a compressed tree from various inputs.")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate a compressed tree from various inputs."
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(title="subcommands", required=True)
|
||||
parser_convert = subparsers.add_parser('convert', help='Convert trees from one format to another.')
|
||||
parser_another = subparsers.add_parser('another_subcommand', help='Does something else')
|
||||
parser_convert = subparsers.add_parser(
|
||||
"convert", help="Convert trees from one format to another."
|
||||
)
|
||||
# parser_another = subparsers.add_parser(
|
||||
# "another_subcommand", help="Does something else"
|
||||
# )
|
||||
|
||||
parser_convert.add_argument(
|
||||
"--input",
|
||||
type=argparse.FileType("r"),
|
||||
default=sys.stdin,
|
||||
help="Specify the input file (default: standard input)."
|
||||
help="Specify the input file (default: standard input).",
|
||||
)
|
||||
parser_convert.add_argument(
|
||||
"--output",
|
||||
type=argparse.FileType("w"),
|
||||
default=sys.stdout,
|
||||
help="Specify the output file (default: standard output)."
|
||||
help="Specify the output file (default: standard output).",
|
||||
)
|
||||
|
||||
parser_convert.add_argument(
|
||||
@ -36,25 +42,26 @@ def main():
|
||||
help="""Specify the input format:
|
||||
fdb: the output of fdb list --porcelain
|
||||
mars: the output of mars list
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
parser_convert.add_argument(
|
||||
"--output_format",
|
||||
choices=["text", "html"],
|
||||
default="text",
|
||||
help="Specify the output format (text or html)."
|
||||
help="Specify the output format (text or html).",
|
||||
)
|
||||
parser_convert.set_defaults(func=convert)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.func(args)
|
||||
|
||||
|
||||
def convert(args):
|
||||
q = Qube.empty()
|
||||
for datacube in parse_fdb_list(args.input):
|
||||
new_branch = Qube.from_datacube(datacube)
|
||||
q = (q | Qube.from_datacube(datacube))
|
||||
q = q | Qube.from_datacube(datacube)
|
||||
|
||||
# output = match args.output_format:
|
||||
# case "text":
|
||||
@ -71,5 +78,6 @@ def convert(args):
|
||||
console.print(locals())
|
||||
console.print("FOO", style="white on blue")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -1,16 +1,20 @@
|
||||
def parse_key_value_pairs(text: str):
|
||||
result = {}
|
||||
text = text.replace("}{", ",") # Replace segment separators
|
||||
text = text.replace("{", "").replace("}","").strip() # Remove leading/trailing braces
|
||||
text = (
|
||||
text.replace("{", "").replace("}", "").strip()
|
||||
) # Remove leading/trailing braces
|
||||
|
||||
for segment in text.split(","):
|
||||
if "=" not in segment: print(segment)
|
||||
if "=" not in segment:
|
||||
print(segment)
|
||||
key, values = segment.split("=", 1) # Ensure split only happens at first "="
|
||||
values = values.split("/")
|
||||
result[key] = values
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_fdb_list(f):
|
||||
for line in f.readlines():
|
||||
# Handle fdb list normal
|
||||
|
@ -10,13 +10,17 @@ from .value_types import Values
|
||||
class NodeData:
|
||||
key: str
|
||||
values: Values
|
||||
metadata: dict[str, tuple[Hashable, ...]] = field(default_factory=frozendict, compare=False)
|
||||
metadata: dict[str, tuple[Hashable, ...]] = field(
|
||||
default_factory=frozendict, compare=False
|
||||
)
|
||||
|
||||
def summary(self) -> str:
|
||||
return f"{self.key}={self.values.summary()}" if self.key != "root" else "root"
|
||||
|
||||
|
||||
@dataclass(frozen=False, eq=True, order=True)
|
||||
class RootNodeData(NodeData):
|
||||
"Helper class to print a custom root name"
|
||||
|
||||
def summary(self) -> str:
|
||||
return self.key
|
@ -20,16 +20,31 @@ class SetOperation(Enum):
|
||||
DIFFERENCE = (1, 0, 0)
|
||||
SYMMETRIC_DIFFERENCE = (1, 0, 1)
|
||||
|
||||
def fused_set_operations(A: "Values", B: "Values") -> tuple[list[Values], list[Values], list[Values]]:
|
||||
|
||||
def fused_set_operations(
|
||||
A: "Values", B: "Values"
|
||||
) -> tuple[list[Values], list[Values], list[Values]]:
|
||||
if isinstance(A, QEnum) and isinstance(B, QEnum):
|
||||
set_A, set_B = set(A), set(B)
|
||||
intersection = set_A & set_B
|
||||
just_A = set_A - intersection
|
||||
just_B = set_B - intersection
|
||||
return [QEnum(just_A),], [QEnum(intersection),], [QEnum(just_B),]
|
||||
return (
|
||||
[
|
||||
QEnum(just_A),
|
||||
],
|
||||
[
|
||||
QEnum(intersection),
|
||||
],
|
||||
[
|
||||
QEnum(just_B),
|
||||
],
|
||||
)
|
||||
|
||||
raise NotImplementedError(
|
||||
"Fused set operations on values types other than QEnum are not yet implemented"
|
||||
)
|
||||
|
||||
raise NotImplementedError("Fused set operations on values types other than QEnum are not yet implemented")
|
||||
|
||||
def node_intersection(A: "Values", B: "Values") -> tuple[Values, Values, Values]:
|
||||
if isinstance(A, QEnum) and isinstance(B, QEnum):
|
||||
@ -39,17 +54,23 @@ def node_intersection(A: "Values", B: "Values") -> tuple[Values, Values, Values]
|
||||
just_B = set_B - intersection
|
||||
return QEnum(just_A), QEnum(intersection), QEnum(just_B)
|
||||
|
||||
raise NotImplementedError(
|
||||
"Fused set operations on values types other than QEnum are not yet implemented"
|
||||
)
|
||||
|
||||
raise NotImplementedError("Fused set operations on values types other than QEnum are not yet implemented")
|
||||
|
||||
def operation(A: "Qube", B : "Qube", operation_type: SetOperation, node_type) -> "Qube":
|
||||
assert A.key == B.key, "The two Qube root nodes must have the same key to perform set operations," \
|
||||
f"would usually be two root nodes. They have {A.key} and {B.key} respectively"
|
||||
def operation(A: "Qube", B: "Qube", operation_type: SetOperation, node_type) -> "Qube":
|
||||
assert A.key == B.key, (
|
||||
"The two Qube root nodes must have the same key to perform set operations,"
|
||||
f"would usually be two root nodes. They have {A.key} and {B.key} respectively"
|
||||
)
|
||||
|
||||
assert A.values == B.values, f"The two Qube root nodes must have the same values to perform set operations {A.values = }, {B.values = }"
|
||||
assert A.values == B.values, (
|
||||
f"The two Qube root nodes must have the same values to perform set operations {A.values = }, {B.values = }"
|
||||
)
|
||||
|
||||
# Group the children of the two nodes by key
|
||||
nodes_by_key = defaultdict(lambda : ([], []))
|
||||
nodes_by_key = defaultdict(lambda: ([], []))
|
||||
for node in A.children:
|
||||
nodes_by_key[node.key][0].append(node)
|
||||
for node in B.children:
|
||||
@ -59,7 +80,9 @@ def operation(A: "Qube", B : "Qube", operation_type: SetOperation, node_type) ->
|
||||
|
||||
# For every node group, perform the set operation
|
||||
for key, (A_nodes, B_nodes) in nodes_by_key.items():
|
||||
new_children.extend(_operation(key, A_nodes, B_nodes, operation_type, node_type))
|
||||
new_children.extend(
|
||||
_operation(key, A_nodes, B_nodes, operation_type, node_type)
|
||||
)
|
||||
|
||||
# Whenever we modify children we should recompress them
|
||||
# But since `operation` is already recursive, we only need to compress this level not all levels
|
||||
@ -71,7 +94,9 @@ def operation(A: "Qube", B : "Qube", operation_type: SetOperation, node_type) ->
|
||||
|
||||
|
||||
# The root node is special so we need a helper method that we can recurse on
|
||||
def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetOperation, node_type) -> Iterable["Qube"]:
|
||||
def _operation(
|
||||
key: str, A: list["Qube"], B: list["Qube"], operation_type: SetOperation, node_type
|
||||
) -> Iterable["Qube"]:
|
||||
# We need to deal with the case where only one of the trees has this key.
|
||||
# To do so we can insert a dummy node with no children and no values into both A and B
|
||||
keep_just_A, keep_intersection, keep_just_B = operation_type.value
|
||||
@ -83,7 +108,6 @@ def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetO
|
||||
|
||||
for node_a in A:
|
||||
for node_b in B:
|
||||
|
||||
# Compute A - B, A & B, B - A
|
||||
# Update the values for the two source nodes to remove the intersection
|
||||
just_a, intersection, just_b = node_intersection(
|
||||
@ -97,11 +121,14 @@ def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetO
|
||||
|
||||
if keep_intersection:
|
||||
if intersection:
|
||||
new_node_a = replace(node_a, data = replace(node_a.data, values = intersection))
|
||||
new_node_b = replace(node_b, data= replace(node_b.data, values = intersection))
|
||||
new_node_a = replace(
|
||||
node_a, data=replace(node_a.data, values=intersection)
|
||||
)
|
||||
new_node_b = replace(
|
||||
node_b, data=replace(node_b.data, values=intersection)
|
||||
)
|
||||
yield operation(new_node_a, new_node_b, operation_type, node_type)
|
||||
|
||||
|
||||
# Now we've removed all the intersections we can yield the just_A and just_B parts if needed
|
||||
if keep_just_A:
|
||||
for node in A:
|
||||
@ -112,6 +139,7 @@ def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetO
|
||||
if values[node]:
|
||||
yield node_type.make(key, values[node], node.children)
|
||||
|
||||
|
||||
def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]:
|
||||
"""
|
||||
Helper method tht only compresses a set of nodes, and doesn't do it recursively.
|
||||
@ -125,7 +153,7 @@ def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]:
|
||||
key = hash((child.key, tuple((cc.structural_hash for cc in child.children))))
|
||||
identical_children[key].add(child)
|
||||
|
||||
# Now go through and create new compressed nodes for any groups that need collapsing
|
||||
# Now go through and create new compressed nodes for any groups that need collapsing
|
||||
new_children = []
|
||||
for child_set in identical_children.values():
|
||||
if len(child_set) > 1:
|
||||
@ -134,19 +162,23 @@ def compress_children(children: Iterable["Qube"]) -> tuple["Qube"]:
|
||||
key = child_set[0].key
|
||||
|
||||
# Compress the children into a single node
|
||||
assert all(isinstance(child.data.values, QEnum) for child in child_set), "All children must have QEnum values"
|
||||
assert all(isinstance(child.data.values, QEnum) for child in child_set), (
|
||||
"All children must have QEnum values"
|
||||
)
|
||||
|
||||
node_data = NodeData(
|
||||
key = key,
|
||||
metadata = frozendict(), # Todo: Implement metadata compression
|
||||
values = QEnum((v for child in child_set for v in child.data.values.values)),
|
||||
key=key,
|
||||
metadata=frozendict(), # Todo: Implement metadata compression
|
||||
values=QEnum(
|
||||
(v for child in child_set for v in child.data.values.values)
|
||||
),
|
||||
)
|
||||
new_child = node_type(data = node_data, children = child_set[0].children)
|
||||
new_child = node_type(data=node_data, children=child_set[0].children)
|
||||
else:
|
||||
# If the group is size one just keep it
|
||||
new_child = child_set.pop()
|
||||
|
||||
new_children.append(new_child)
|
||||
return tuple(sorted(new_children,
|
||||
key = lambda n : ((n.key, tuple(sorted(n.values.values))))
|
||||
))
|
||||
return tuple(
|
||||
sorted(new_children, key=lambda n: ((n.key, tuple(sorted(n.values.values)))))
|
||||
)
|
||||
|
@ -6,17 +6,24 @@ from typing import Iterable, Protocol, Sequence, runtime_checkable
|
||||
@runtime_checkable
|
||||
class TreeLike(Protocol):
|
||||
@property
|
||||
def children(self) -> Sequence["TreeLike"]: ... # Supports indexing like node.children[i]
|
||||
def children(
|
||||
self,
|
||||
) -> Sequence["TreeLike"]: ... # Supports indexing like node.children[i]
|
||||
|
||||
def summary(self) -> str: ...
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HTML():
|
||||
class HTML:
|
||||
html: str
|
||||
|
||||
def _repr_html_(self):
|
||||
return self.html
|
||||
|
||||
def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, str, TreeLike]:
|
||||
|
||||
def summarize_node(
|
||||
node: TreeLike, collapse=False, **kwargs
|
||||
) -> tuple[str, str, TreeLike]:
|
||||
"""
|
||||
Extracts a summarized representation of the node while collapsing single-child paths.
|
||||
Returns the summary string and the last node in the chain that has multiple children.
|
||||
@ -40,7 +47,8 @@ def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, str
|
||||
|
||||
return ", ".join(summaries), ",".join(paths), node
|
||||
|
||||
def node_tree_to_string(node : TreeLike, prefix : str = "", depth = None) -> Iterable[str]:
|
||||
|
||||
def node_tree_to_string(node: TreeLike, prefix: str = "", depth=None) -> Iterable[str]:
|
||||
summary, path, node = summarize_node(node)
|
||||
|
||||
if depth is not None and depth <= 0:
|
||||
@ -49,7 +57,7 @@ def node_tree_to_string(node : TreeLike, prefix : str = "", depth = None) -> Ite
|
||||
# Special case for nodes with only a single child, this makes the printed representation more compact
|
||||
elif len(node.children) == 1:
|
||||
yield summary + ", "
|
||||
yield from node_tree_to_string(node.children[0], prefix, depth = depth)
|
||||
yield from node_tree_to_string(node.children[0], prefix, depth=depth)
|
||||
return
|
||||
else:
|
||||
yield summary + "\n"
|
||||
@ -58,9 +66,14 @@ def node_tree_to_string(node : TreeLike, prefix : str = "", depth = None) -> Ite
|
||||
connector = "└── " if index == len(node.children) - 1 else "├── "
|
||||
yield prefix + connector
|
||||
extension = " " if index == len(node.children) - 1 else "│ "
|
||||
yield from node_tree_to_string(child, prefix + extension, depth = depth - 1 if depth is not None else None)
|
||||
yield from node_tree_to_string(
|
||||
child, prefix + extension, depth=depth - 1 if depth is not None else None
|
||||
)
|
||||
|
||||
def _node_tree_to_html(node : TreeLike, prefix : str = "", depth = 1, connector = "", **kwargs) -> Iterable[str]:
|
||||
|
||||
def _node_tree_to_html(
|
||||
node: TreeLike, prefix: str = "", depth=1, connector="", **kwargs
|
||||
) -> Iterable[str]:
|
||||
summary, path, node = summarize_node(node, **kwargs)
|
||||
|
||||
if len(node.children) == 0:
|
||||
@ -73,15 +86,22 @@ def _node_tree_to_html(node : TreeLike, prefix : str = "", depth = 1, connector
|
||||
for index, child in enumerate(node.children):
|
||||
connector = "└── " if index == len(node.children) - 1 else "├── "
|
||||
extension = " " if index == len(node.children) - 1 else "│ "
|
||||
yield from _node_tree_to_html(child, prefix + extension, depth = depth - 1, connector = prefix+connector, **kwargs)
|
||||
yield from _node_tree_to_html(
|
||||
child,
|
||||
prefix + extension,
|
||||
depth=depth - 1,
|
||||
connector=prefix + connector,
|
||||
**kwargs,
|
||||
)
|
||||
yield "</details>"
|
||||
|
||||
def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
|
||||
css_id = f"qubed-tree-{random.randint(0, 1000000)}"
|
||||
|
||||
# It's ugle to use an f string here because css uses {} so much so instead
|
||||
# we use CSS_ID as a placeholder and replace it later
|
||||
css = """
|
||||
def node_tree_to_html(node: TreeLike, depth=1, **kwargs) -> str:
|
||||
css_id = f"qubed-tree-{random.randint(0, 1000000)}"
|
||||
|
||||
# It's ugle to use an f string here because css uses {} so much so instead
|
||||
# we use CSS_ID as a placeholder and replace it later
|
||||
css = """
|
||||
<style>
|
||||
pre#CSS_ID {
|
||||
font-family: monospace;
|
||||
@ -136,8 +156,8 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
|
||||
</style>
|
||||
""".replace("CSS_ID", css_id)
|
||||
|
||||
# This js snippet copies the path of a node to the clipboard when clicked
|
||||
js = """
|
||||
# This js snippet copies the path of a node to the clipboard when clicked
|
||||
js = """
|
||||
<script type="module" defer>
|
||||
async function nodeOnClick(event) {
|
||||
if (!event.altKey) return;
|
||||
@ -159,5 +179,5 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
|
||||
nodes.forEach(n => n.addEventListener("click", nodeOnClick));
|
||||
</script>
|
||||
""".replace("CSS_ID", css_id)
|
||||
nodes = "".join(_node_tree_to_html(node=node, depth=depth, **kwargs))
|
||||
return f"{js}{css}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"
|
||||
nodes = "".join(_node_tree_to_html(node=node, depth=depth, **kwargs))
|
||||
return f"{js}{css}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"
|
||||
|
@ -2,8 +2,9 @@ from dataclasses import dataclass, field
|
||||
|
||||
character = str
|
||||
|
||||
|
||||
@dataclass(unsafe_hash=True)
|
||||
class TrieNode():
|
||||
class TrieNode:
|
||||
parent: "TrieNode | None"
|
||||
parent_char: character
|
||||
children: dict[character, "TrieNode"] = field(default_factory=dict)
|
||||
@ -37,4 +38,3 @@ class Trie:
|
||||
leaf_node = leaf_node.parent
|
||||
|
||||
return "".join(reversed(string))
|
||||
|
||||
|
@ -7,11 +7,13 @@ from typing import TYPE_CHECKING, Any, FrozenSet, Iterable, Literal, TypeVar
|
||||
if TYPE_CHECKING:
|
||||
from .Qube import Qube
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Values(ABC):
|
||||
@abstractmethod
|
||||
def summary(self) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def __len__(self) -> int:
|
||||
pass
|
||||
@ -25,7 +27,7 @@ class Values(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def from_strings(self, values: Iterable[str]) -> list['Values']:
|
||||
def from_strings(self, values: Iterable[str]) -> list["Values"]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@ -36,19 +38,22 @@ class Values(ABC):
|
||||
def to_json(self):
|
||||
pass
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
EnumValuesType = FrozenSet[T]
|
||||
|
||||
|
||||
@dataclass(frozen=True, order=True)
|
||||
class QEnum(Values):
|
||||
"""
|
||||
The simplest kind of key value is just a list of strings.
|
||||
summary -> string1/string2/string....
|
||||
"""
|
||||
|
||||
values: EnumValuesType
|
||||
|
||||
def __init__(self, obj):
|
||||
object.__setattr__(self, 'values', frozenset(obj))
|
||||
object.__setattr__(self, "values", frozenset(obj))
|
||||
|
||||
def __post_init__(self):
|
||||
assert isinstance(self.values, tuple)
|
||||
@ -60,20 +65,28 @@ class QEnum(Values):
|
||||
return len(self.values)
|
||||
|
||||
def summary(self) -> str:
|
||||
return '/'.join(map(str, sorted(self.values)))
|
||||
return "/".join(map(str, sorted(self.values)))
|
||||
|
||||
def __contains__(self, value: Any) -> bool:
|
||||
return value in self.values
|
||||
def from_strings(self, values: Iterable[str]) -> list['Values']:
|
||||
|
||||
def from_strings(self, values: Iterable[str]) -> list["Values"]:
|
||||
return [type(self)(tuple(values))]
|
||||
|
||||
def min(self):
|
||||
return min(self.values)
|
||||
|
||||
def to_json(self):
|
||||
return list(self.values)
|
||||
|
||||
|
||||
class DateEnum(QEnum):
|
||||
def summary(self) -> str:
|
||||
def fmt(d): return d.strftime("%Y%m%d")
|
||||
return '/'.join(map(fmt, sorted(self.values)))
|
||||
def fmt(d):
|
||||
return d.strftime("%Y%m%d")
|
||||
|
||||
return "/".join(map(fmt, sorted(self.values)))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Range(Values, ABC):
|
||||
@ -95,6 +108,7 @@ class Range(Values, ABC):
|
||||
def to_json(self):
|
||||
return dataclasses.asdict(self)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DateRange(Range):
|
||||
start: date
|
||||
@ -118,27 +132,34 @@ class DateRange(Range):
|
||||
return [DateEnum(dates)]
|
||||
|
||||
ranges = []
|
||||
current_group, dates = [dates[0],], dates[1:]
|
||||
current_type : Literal["enum", "range"] = "enum"
|
||||
current_group, dates = (
|
||||
[
|
||||
dates[0],
|
||||
],
|
||||
dates[1:],
|
||||
)
|
||||
current_type: Literal["enum", "range"] = "enum"
|
||||
while len(dates) > 1:
|
||||
if current_type == "range":
|
||||
|
||||
# If the next date fits then add it to the current range
|
||||
if dates[0] - current_group[-1] == timedelta(days=1):
|
||||
current_group.append(dates.pop(0))
|
||||
|
||||
|
||||
# Emit the current range and start a new one
|
||||
else:
|
||||
if len(current_group) == 1:
|
||||
ranges.append(DateEnum(current_group))
|
||||
else:
|
||||
ranges.append(DateRange(
|
||||
start=current_group[0],
|
||||
end=current_group[-1],
|
||||
step=timedelta(days=1)
|
||||
))
|
||||
current_group = [dates.pop(0),]
|
||||
ranges.append(
|
||||
DateRange(
|
||||
start=current_group[0],
|
||||
end=current_group[-1],
|
||||
step=timedelta(days=1),
|
||||
)
|
||||
)
|
||||
current_group = [
|
||||
dates.pop(0),
|
||||
]
|
||||
current_type = "enum"
|
||||
|
||||
if current_type == "enum":
|
||||
@ -156,11 +177,13 @@ class DateRange(Range):
|
||||
# Handle remaining `current_group`
|
||||
if current_group:
|
||||
if current_type == "range":
|
||||
ranges.append(DateRange(
|
||||
start=current_group[0],
|
||||
end=current_group[-1],
|
||||
step=timedelta(days=1)
|
||||
))
|
||||
ranges.append(
|
||||
DateRange(
|
||||
start=current_group[0],
|
||||
end=current_group[-1],
|
||||
step=timedelta(days=1),
|
||||
)
|
||||
)
|
||||
else:
|
||||
ranges.append(DateEnum(current_group))
|
||||
|
||||
@ -171,13 +194,18 @@ class DateRange(Range):
|
||||
return self.start <= v <= self.end and (v - self.start) % self.step == 0
|
||||
|
||||
def summary(self) -> str:
|
||||
def fmt(d): return d.strftime("%Y%m%d")
|
||||
def fmt(d):
|
||||
return d.strftime("%Y%m%d")
|
||||
|
||||
if self.step == timedelta(days=0):
|
||||
return f"{fmt(self.start)}"
|
||||
if self.step == timedelta(days=1):
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}"
|
||||
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
|
||||
return (
|
||||
f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TimeRange(Range):
|
||||
@ -188,47 +216,51 @@ class TimeRange(Range):
|
||||
|
||||
def min(self):
|
||||
return self.start
|
||||
|
||||
def __iter__(self) -> Iterable[Any]:
|
||||
return super().__iter__()
|
||||
|
||||
@classmethod
|
||||
def from_strings(self, values: Iterable[str]) -> list['TimeRange']:
|
||||
def from_strings(self, values: Iterable[str]) -> list["TimeRange"]:
|
||||
times = sorted([int(v) for v in values])
|
||||
if len(times) < 2:
|
||||
return [TimeRange(
|
||||
start=times[0],
|
||||
end=times[0],
|
||||
step=100
|
||||
)]
|
||||
return [TimeRange(start=times[0], end=times[0], step=100)]
|
||||
|
||||
ranges = []
|
||||
current_range, times = [times[0],], times[1:]
|
||||
current_range, times = (
|
||||
[
|
||||
times[0],
|
||||
],
|
||||
times[1:],
|
||||
)
|
||||
while len(times) > 1:
|
||||
if times[0] - current_range[-1] == 1:
|
||||
current_range.append(times.pop(0))
|
||||
|
||||
elif len(current_range) == 1:
|
||||
ranges.append(TimeRange(
|
||||
start=current_range[0],
|
||||
end=current_range[0],
|
||||
step=0
|
||||
))
|
||||
current_range = [times.pop(0),]
|
||||
ranges.append(
|
||||
TimeRange(start=current_range[0], end=current_range[0], step=0)
|
||||
)
|
||||
current_range = [
|
||||
times.pop(0),
|
||||
]
|
||||
|
||||
else:
|
||||
ranges.append(TimeRange(
|
||||
start=current_range[0],
|
||||
end=current_range[-1],
|
||||
step=1
|
||||
))
|
||||
current_range = [times.pop(0),]
|
||||
ranges.append(
|
||||
TimeRange(start=current_range[0], end=current_range[-1], step=1)
|
||||
)
|
||||
current_range = [
|
||||
times.pop(0),
|
||||
]
|
||||
return ranges
|
||||
|
||||
def __len__(self) -> int:
|
||||
return (self.end - self.start) // self.step
|
||||
|
||||
def summary(self) -> str:
|
||||
def fmt(d): return f"{d:04d}"
|
||||
def fmt(d):
|
||||
return f"{d:04d}"
|
||||
|
||||
if self.step == 0:
|
||||
return f"{fmt(self.start)}"
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
|
||||
@ -237,6 +269,7 @@ class TimeRange(Range):
|
||||
v = int(value)
|
||||
return self.start <= v <= self.end and (v - self.start) % self.step == 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IntRange(Range):
|
||||
start: int
|
||||
@ -248,7 +281,9 @@ class IntRange(Range):
|
||||
return (self.end - self.start) // self.step
|
||||
|
||||
def summary(self) -> str:
|
||||
def fmt(d): return d
|
||||
def fmt(d):
|
||||
return d
|
||||
|
||||
if self.step == 0:
|
||||
return f"{fmt(self.start)}"
|
||||
return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
|
||||
@ -258,54 +293,62 @@ class IntRange(Range):
|
||||
return self.start <= v <= self.end and (v - self.start) % self.step == 0
|
||||
|
||||
@classmethod
|
||||
def from_strings(self, values: Iterable[str]) -> list['IntRange']:
|
||||
def from_strings(self, values: Iterable[str]) -> list["IntRange"]:
|
||||
ints = sorted([int(v) for v in values])
|
||||
if len(ints) < 2:
|
||||
return [IntRange(
|
||||
start=ints[0],
|
||||
end=ints[0],
|
||||
step=0
|
||||
)]
|
||||
return [IntRange(start=ints[0], end=ints[0], step=0)]
|
||||
|
||||
ranges = []
|
||||
current_range, ints = [ints[0],], ints[1:]
|
||||
current_range, ints = (
|
||||
[
|
||||
ints[0],
|
||||
],
|
||||
ints[1:],
|
||||
)
|
||||
while len(ints) > 1:
|
||||
if ints[0] - current_range[-1] == 1:
|
||||
current_range.append(ints.pop(0))
|
||||
|
||||
elif len(current_range) == 1:
|
||||
ranges.append(IntRange(
|
||||
start=current_range[0],
|
||||
end=current_range[0],
|
||||
step=0
|
||||
))
|
||||
current_range = [ints.pop(0),]
|
||||
ranges.append(
|
||||
IntRange(start=current_range[0], end=current_range[0], step=0)
|
||||
)
|
||||
current_range = [
|
||||
ints.pop(0),
|
||||
]
|
||||
|
||||
else:
|
||||
ranges.append(IntRange(
|
||||
start=current_range[0],
|
||||
end=current_range[-1],
|
||||
step=1
|
||||
))
|
||||
current_range = [ints.pop(0),]
|
||||
ranges.append(
|
||||
IntRange(start=current_range[0], end=current_range[-1], step=1)
|
||||
)
|
||||
current_range = [
|
||||
ints.pop(0),
|
||||
]
|
||||
return ranges
|
||||
|
||||
|
||||
def values_from_json(obj) -> Values:
|
||||
if isinstance(obj, list):
|
||||
return QEnum(tuple(obj))
|
||||
|
||||
match obj["dtype"]:
|
||||
case "date": return DateRange(**obj)
|
||||
case "time": return TimeRange(**obj)
|
||||
case "int": return IntRange(**obj)
|
||||
case _: raise ValueError(f"Unknown dtype {obj['dtype']}")
|
||||
case "date":
|
||||
return DateRange(**obj)
|
||||
case "time":
|
||||
return TimeRange(**obj)
|
||||
case "int":
|
||||
return IntRange(**obj)
|
||||
case _:
|
||||
raise ValueError(f"Unknown dtype {obj['dtype']}")
|
||||
|
||||
|
||||
def convert_datatypes(q: "Qube", conversions: dict[str, Values]) -> "Qube":
|
||||
def _convert(q: "Qube") -> Iterable["Qube"]:
|
||||
if q.key in conversions:
|
||||
data_type = conversions[q.key]
|
||||
assert isinstance(q.values, QEnum), "Only QEnum values can be converted to other datatypes."
|
||||
assert isinstance(q.values, QEnum), (
|
||||
"Only QEnum values can be converted to other datatypes."
|
||||
)
|
||||
for values_group in data_type.from_strings(q.values):
|
||||
# print(values_group)
|
||||
yield replace(q, data=replace(q.data, values=values_group))
|
||||
|
@ -20,7 +20,8 @@ app.add_middleware(
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
@app.get('/favicon.ico', include_in_schema=False)
|
||||
|
||||
@app.get("/favicon.ico", include_in_schema=False)
|
||||
async def favicon():
|
||||
return FileResponse("favicon.ico")
|
||||
|
||||
@ -32,16 +33,15 @@ if "LOCAL_CACHE" in os.environ:
|
||||
with open(base / "compressed_tree.json", "r") as f:
|
||||
json_tree = f.read()
|
||||
|
||||
|
||||
with open(base / "language.yaml", "r") as f:
|
||||
mars_language = yaml.safe_load(f)["_field"]
|
||||
|
||||
else:
|
||||
print("Getting cache from redis")
|
||||
r = redis.Redis(host="redis", port=6379, db=0)
|
||||
json_tree = r.get('compressed_catalog')
|
||||
json_tree = r.get("compressed_catalog")
|
||||
assert json_tree, "No compressed tree found in redis"
|
||||
mars_language = json.loads(r.get('mars_language'))
|
||||
mars_language = json.loads(r.get("mars_language"))
|
||||
|
||||
print("Loading tree from json")
|
||||
c_tree = CompressedTree.from_json(json.loads(json_tree))
|
||||
@ -51,6 +51,7 @@ tree = c_tree.reconstruct_compressed_ecmwf_style()
|
||||
|
||||
print("Ready to serve requests!")
|
||||
|
||||
|
||||
def request_to_dict(request: Request) -> Dict[str, Any]:
|
||||
# Convert query parameters to dictionary format
|
||||
request_dict = dict(request.query_params)
|
||||
@ -61,8 +62,10 @@ def request_to_dict(request: Request) -> Dict[str, Any]:
|
||||
|
||||
return request_dict
|
||||
|
||||
|
||||
def match_against_cache(request, tree):
|
||||
if not tree: return {"_END_" : {}}
|
||||
if not tree:
|
||||
return {"_END_": {}}
|
||||
matches = {}
|
||||
for k, subtree in tree.items():
|
||||
if len(k.split("=")) != 2:
|
||||
@ -71,13 +74,20 @@ def match_against_cache(request, tree):
|
||||
values = set(values.split(","))
|
||||
if key in request:
|
||||
if isinstance(request[key], list):
|
||||
matching_values = ",".join(request_value for request_value in request[key] if request_value in values)
|
||||
matching_values = ",".join(
|
||||
request_value
|
||||
for request_value in request[key]
|
||||
if request_value in values
|
||||
)
|
||||
if matching_values:
|
||||
matches[f"{key}={matching_values}"] = match_against_cache(request, subtree)
|
||||
matches[f"{key}={matching_values}"] = match_against_cache(
|
||||
request, subtree
|
||||
)
|
||||
elif request[key] in values:
|
||||
matches[f"{key}={request[key]}"] = match_against_cache(request, subtree)
|
||||
|
||||
if not matches: return {k : {} for k in tree.keys()}
|
||||
if not matches:
|
||||
return {k: {} for k in tree.keys()}
|
||||
return matches
|
||||
|
||||
|
||||
@ -87,33 +97,46 @@ def max_tree_depth(tree):
|
||||
return 0
|
||||
return 1 + max(max_tree_depth(v) for v in tree.values())
|
||||
|
||||
def prune_short_branches(tree, depth = None):
|
||||
|
||||
def prune_short_branches(tree, depth=None):
|
||||
if depth is None:
|
||||
depth = max_tree_depth(tree)
|
||||
return {k : prune_short_branches(v, depth-1) for k, v in tree.items() if max_tree_depth(v) == depth-1}
|
||||
return {
|
||||
k: prune_short_branches(v, depth - 1)
|
||||
for k, v in tree.items()
|
||||
if max_tree_depth(v) == depth - 1
|
||||
}
|
||||
|
||||
|
||||
def get_paths_to_leaves(tree):
|
||||
for k,v in tree.items():
|
||||
for k, v in tree.items():
|
||||
if not v:
|
||||
yield [k,]
|
||||
yield [
|
||||
k,
|
||||
]
|
||||
else:
|
||||
for leaf in get_paths_to_leaves(v):
|
||||
yield [k,] + leaf
|
||||
yield [
|
||||
k,
|
||||
] + leaf
|
||||
|
||||
|
||||
def get_leaves(tree):
|
||||
for k,v in tree.items():
|
||||
for k, v in tree.items():
|
||||
if not v:
|
||||
yield k
|
||||
else:
|
||||
for leaf in get_leaves(v):
|
||||
yield leaf
|
||||
|
||||
|
||||
@app.get("/api/tree")
|
||||
async def get_tree(request: Request):
|
||||
request_dict = request_to_dict(request)
|
||||
print(c_tree.multi_match(request_dict))
|
||||
return c_tree.multi_match(request_dict)
|
||||
|
||||
|
||||
@app.get("/api/match")
|
||||
async def get_match(request: Request):
|
||||
# Convert query parameters to dictionary format
|
||||
@ -122,7 +145,6 @@ async def get_match(request: Request):
|
||||
# Run the schema matching logic
|
||||
match_tree = match_against_cache(request_dict, tree)
|
||||
|
||||
|
||||
# Prune the tree to only include branches that are as deep as the deepest match
|
||||
# This means if you don't choose a certain branch at some point
|
||||
# the UI won't keep nagging you to choose a value for that branch
|
||||
@ -130,6 +152,7 @@ async def get_match(request: Request):
|
||||
|
||||
return match_tree
|
||||
|
||||
|
||||
@app.get("/api/paths")
|
||||
async def api_paths(request: Request):
|
||||
request_dict = request_to_dict(request)
|
||||
@ -137,11 +160,11 @@ async def api_paths(request: Request):
|
||||
match_tree = prune_short_branches(match_tree)
|
||||
paths = get_paths_to_leaves(match_tree)
|
||||
|
||||
|
||||
# deduplicate leaves based on the key
|
||||
by_path = defaultdict(lambda : {"paths" : set(), "values" : set()})
|
||||
by_path = defaultdict(lambda: {"paths": set(), "values": set()})
|
||||
for p in paths:
|
||||
if p[-1] == "_END_": continue
|
||||
if p[-1] == "_END_":
|
||||
continue
|
||||
key, values = p[-1].split("=")
|
||||
values = values.split(",")
|
||||
path = tuple(p[:-1])
|
||||
@ -149,66 +172,75 @@ async def api_paths(request: Request):
|
||||
by_path[key]["values"].update(values)
|
||||
by_path[key]["paths"].add(tuple(path))
|
||||
|
||||
return [{
|
||||
return [
|
||||
{
|
||||
"paths": list(v["paths"]),
|
||||
"key": key,
|
||||
"values": sorted(v["values"], reverse=True),
|
||||
} for key, v in by_path.items()]
|
||||
}
|
||||
for key, v in by_path.items()
|
||||
]
|
||||
|
||||
|
||||
@app.get("/api/stac")
|
||||
async def get_STAC(request: Request):
|
||||
request_dict = request_to_dict(request)
|
||||
paths = await api_paths(request)
|
||||
|
||||
|
||||
def make_link(key_name, paths, values):
|
||||
"""Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link"""
|
||||
path = paths[0]
|
||||
href_template = f"/stac?{'&'.join(path)}{'&' if path else ''}{key_name}={{}}"
|
||||
optional = [False]
|
||||
optional_str = "Yes" if all(optional) and len(optional) > 0 else ("Sometimes" if any(optional) else "No")
|
||||
# optional_str = (
|
||||
# "Yes"
|
||||
# if all(optional) and len(optional) > 0
|
||||
# else ("Sometimes" if any(optional) else "No")
|
||||
# )
|
||||
values_from_mars_language = mars_language.get(key_name, {}).get("values", [])
|
||||
|
||||
# values = [v[0] if isinstance(v, list) else v for v in values_from_mars_language]
|
||||
|
||||
if all(isinstance(v, list) for v in values_from_mars_language):
|
||||
value_descriptions_dict = {k : v[-1]
|
||||
for v in values_from_mars_language
|
||||
if len(v) > 1
|
||||
for k in v[:-1]}
|
||||
value_descriptions_dict = {
|
||||
k: v[-1]
|
||||
for v in values_from_mars_language
|
||||
if len(v) > 1
|
||||
for k in v[:-1]
|
||||
}
|
||||
value_descriptions = [value_descriptions_dict.get(v, "") for v in values]
|
||||
if not any(value_descriptions): value_descriptions = None
|
||||
if not any(value_descriptions):
|
||||
value_descriptions = None
|
||||
|
||||
return {
|
||||
"title": key_name,
|
||||
"generalized_datacube:href_template": href_template,
|
||||
"rel": "child",
|
||||
"type": "application/json",
|
||||
"generalized_datacube:dimension" : {
|
||||
"type" : mars_language.get(key_name, {}).get("type", ""),
|
||||
"description": mars_language.get(key_name, {}).get("description", ""),
|
||||
"values" : values,
|
||||
"value_descriptions" : value_descriptions,
|
||||
"optional" : any(optional),
|
||||
"multiple": True,
|
||||
"paths" : paths,
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
"title": key_name,
|
||||
"generalized_datacube:href_template": href_template,
|
||||
"rel": "child",
|
||||
"type": "application/json",
|
||||
"generalized_datacube:dimension": {
|
||||
"type": mars_language.get(key_name, {}).get("type", ""),
|
||||
"description": mars_language.get(key_name, {}).get("description", ""),
|
||||
"values": values,
|
||||
"value_descriptions": value_descriptions,
|
||||
"optional": any(optional),
|
||||
"multiple": True,
|
||||
"paths": paths,
|
||||
},
|
||||
}
|
||||
|
||||
def value_descriptions(key, values):
|
||||
return {
|
||||
v[0] : v[-1] for v in mars_language.get(key, {}).get("values", [])
|
||||
v[0]: v[-1]
|
||||
for v in mars_language.get(key, {}).get("values", [])
|
||||
if len(v) > 1 and v[0] in list(values)
|
||||
}
|
||||
|
||||
descriptions = {
|
||||
key : {
|
||||
"key" : key,
|
||||
"values" : values,
|
||||
"description" : mars_language.get(key, {}).get("description", ""),
|
||||
"value_descriptions" : value_descriptions(key,values),
|
||||
key: {
|
||||
"key": key,
|
||||
"values": values,
|
||||
"description": mars_language.get(key, {}).get("description", ""),
|
||||
"value_descriptions": value_descriptions(key, values),
|
||||
}
|
||||
for key, values in request_dict.items()
|
||||
}
|
||||
@ -219,15 +251,12 @@ async def get_STAC(request: Request):
|
||||
"stac_version": "1.0.0",
|
||||
"id": "partial-matches",
|
||||
"description": "STAC collection representing potential children of this request",
|
||||
"links": [
|
||||
make_link(p["key"], p["paths"], p["values"])
|
||||
for p in paths
|
||||
],
|
||||
"links": [make_link(p["key"], p["paths"], p["values"]) for p in paths],
|
||||
"debug": {
|
||||
"request": request_dict,
|
||||
"descriptions": descriptions,
|
||||
"paths" : paths,
|
||||
}
|
||||
"paths": paths,
|
||||
},
|
||||
}
|
||||
|
||||
return stac_collection
|
@ -9,4 +9,4 @@ with data_path.open("r") as f:
|
||||
|
||||
compressed_tree = compressed_tree.guess_datatypes()
|
||||
|
||||
compressed_tree.print(depth = 10)
|
||||
compressed_tree.print(depth=10)
|
||||
|
@ -5,12 +5,15 @@ from tree_traverser import CompressedTree, RefcountedDict
|
||||
|
||||
class CompressedTreeFixed(CompressedTree):
|
||||
@classmethod
|
||||
def from_json(cls, data : dict):
|
||||
def from_json(cls, data: dict):
|
||||
c = cls({})
|
||||
c.cache = {}
|
||||
ca = data["cache"]
|
||||
for k, v in ca.items():
|
||||
g = {k2 : ca[str(v2)]["dict"][k2] if k2 in ca[str(v2)]["dict"] else v2 for k2, v2 in v["dict"].items()}
|
||||
g = {
|
||||
k2: ca[str(v2)]["dict"][k2] if k2 in ca[str(v2)]["dict"] else v2
|
||||
for k2, v2 in v["dict"].items()
|
||||
}
|
||||
c.cache[int(k)] = RefcountedDict(g)
|
||||
c.cache[int(k)].refcount = v["refcount"]
|
||||
|
||||
@ -20,11 +23,16 @@ class CompressedTreeFixed(CompressedTree):
|
||||
|
||||
def reconstruct(self, max_depth=None) -> dict[str, dict]:
|
||||
"Reconstruct the tree as a normal nested dictionary"
|
||||
def reconstruct_node(h : int, depth : int) -> dict[str, dict]:
|
||||
|
||||
def reconstruct_node(h: int, depth: int) -> dict[str, dict]:
|
||||
if max_depth is not None and depth > max_depth:
|
||||
return {}
|
||||
return {k : reconstruct_node(v, depth=depth+1) for k, v in self.cache[h].items()}
|
||||
return reconstruct_node(self.root_hash, depth = 0)
|
||||
return {
|
||||
k: reconstruct_node(v, depth=depth + 1)
|
||||
for k, v in self.cache[h].items()
|
||||
}
|
||||
|
||||
return reconstruct_node(self.root_hash, depth=0)
|
||||
|
||||
|
||||
data_path = Path("data/compressed_tree_climate_dt.json")
|
||||
@ -39,5 +47,6 @@ output_data_path = Path("data/compressed_tree_climate_dt_ecmwf_style.json")
|
||||
|
||||
compressed_tree.save(output_data_path)
|
||||
|
||||
print(f"climate dt compressed tree ecmwf style: {output_data_path.stat().st_size // 1e6:.1f} MB")
|
||||
|
||||
print(
|
||||
f"climate dt compressed tree ecmwf style: {output_data_path.stat().st_size // 1e6:.1f} MB"
|
||||
)
|
||||
|
@ -5,15 +5,15 @@ from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
import json
|
||||
from more_itertools import chunked
|
||||
|
||||
process = psutil.Process()
|
||||
|
||||
|
||||
def massage_request(r):
|
||||
return {k : v if isinstance(v, list) else [v]
|
||||
for k, v in r.items()}
|
||||
return {k: v if isinstance(v, list) else [v] for k, v in r.items()}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
config = """
|
||||
---
|
||||
type: remote
|
||||
@ -24,10 +24,10 @@ store: remote
|
||||
"""
|
||||
|
||||
request = {
|
||||
"class": "d1",
|
||||
"dataset": "climate-dt",
|
||||
# "date": "19920420",
|
||||
}
|
||||
"class": "d1",
|
||||
"dataset": "climate-dt",
|
||||
# "date": "19920420",
|
||||
}
|
||||
|
||||
data_path = Path("data/compressed_tree_climate_dt.json")
|
||||
if not data_path.exists():
|
||||
@ -35,7 +35,7 @@ store: remote
|
||||
else:
|
||||
compressed_tree = CompressedTree.load(data_path)
|
||||
|
||||
fdb = backend.PyFDB(fdb_config = config)
|
||||
fdb = backend.PyFDB(fdb_config=config)
|
||||
|
||||
visited_path = Path("data/visited_dates.json")
|
||||
if not visited_path.exists():
|
||||
@ -46,13 +46,15 @@ store: remote
|
||||
|
||||
today = datetime.datetime.today()
|
||||
start = datetime.datetime.strptime("19920420", "%Y%m%d")
|
||||
date_list = [start + datetime.timedelta(days=x) for x in range((today - start).days)]
|
||||
date_list = [
|
||||
start + datetime.timedelta(days=x) for x in range((today - start).days)
|
||||
]
|
||||
date_list = [d.strftime("%Y%m%d") for d in date_list if d not in visited_dates]
|
||||
for dates in chunked(tqdm(date_list), 5):
|
||||
print(dates[0])
|
||||
print(f"Memory usage: {(process.memory_info().rss)/1e6:.1f} MB")
|
||||
print(f"Memory usage: {(process.memory_info().rss) / 1e6:.1f} MB")
|
||||
|
||||
r = request | dict(date = dates)
|
||||
r = request | dict(date=dates)
|
||||
tree = fdb.traverse_fdb(massage_request(r))
|
||||
|
||||
compressed_tree.insert_tree(tree)
|
||||
|
@ -1,113 +1,156 @@
|
||||
from qubed import Qube
|
||||
|
||||
d = {
|
||||
"class=od" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
"class=od": {
|
||||
"expver=0001": {"param=1": {}, "param=2": {}},
|
||||
"expver=0002": {"param=1": {}, "param=2": {}},
|
||||
},
|
||||
"class=rd" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
"class=rd": {
|
||||
"expver=0001": {"param=1": {}, "param=2": {}, "param=3": {}},
|
||||
"expver=0002": {"param=1": {}, "param=2": {}},
|
||||
},
|
||||
}
|
||||
q = Qube.from_dict(d)
|
||||
|
||||
|
||||
def test_eq():
|
||||
r = Qube.from_dict(d)
|
||||
assert q == r
|
||||
|
||||
|
||||
def test_getitem():
|
||||
assert q["class", "od"] == Qube.from_dict({
|
||||
"expver=0001": {"param=1":{}, "param=2":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
})
|
||||
assert q["class", "od"]["expver", "0001"] == Qube.from_dict({
|
||||
"param=1":{}, "param=2":{},
|
||||
})
|
||||
assert q["class", "od"] == Qube.from_dict(
|
||||
{
|
||||
"expver=0001": {"param=1": {}, "param=2": {}},
|
||||
"expver=0002": {"param=1": {}, "param=2": {}},
|
||||
}
|
||||
)
|
||||
assert q["class", "od"]["expver", "0001"] == Qube.from_dict(
|
||||
{
|
||||
"param=1": {},
|
||||
"param=2": {},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def test_n_leaves():
|
||||
q = Qube.from_dict({
|
||||
"a=1/2/3" : {"b=1/2/3" : {"c=1/2/3" : {}}},
|
||||
"a=5" : { "b=4" : { "c=4" : {}}}
|
||||
})
|
||||
q = Qube.from_dict(
|
||||
{"a=1/2/3": {"b=1/2/3": {"c=1/2/3": {}}}, "a=5": {"b=4": {"c=4": {}}}}
|
||||
)
|
||||
|
||||
# Size is 3*3*3 + 1*1*1 = 27 + 1
|
||||
assert q.n_leaves == 27 + 1
|
||||
|
||||
|
||||
def test_n_leaves_empty():
|
||||
assert Qube.empty().n_leaves == 0
|
||||
|
||||
|
||||
def test_n_nodes_empty():
|
||||
assert Qube.empty().n_nodes == 0
|
||||
|
||||
|
||||
def test_union():
|
||||
q = Qube.from_dict({"a=1/2/3" : {"b=1" : {}},})
|
||||
r = Qube.from_dict({"a=2/3/4" : {"b=2" : {}},})
|
||||
q = Qube.from_dict(
|
||||
{
|
||||
"a=1/2/3": {"b=1": {}},
|
||||
}
|
||||
)
|
||||
r = Qube.from_dict(
|
||||
{
|
||||
"a=2/3/4": {"b=2": {}},
|
||||
}
|
||||
)
|
||||
|
||||
u = Qube.from_dict({
|
||||
"a=4" : {"b=2" : {}},
|
||||
"a=1" : {"b=1" : {}},
|
||||
"a=2/3" : {"b=1/2" : {}},
|
||||
|
||||
})
|
||||
u = Qube.from_dict(
|
||||
{
|
||||
"a=4": {"b=2": {}},
|
||||
"a=1": {"b=1": {}},
|
||||
"a=2/3": {"b=1/2": {}},
|
||||
}
|
||||
)
|
||||
|
||||
assert q | r == u
|
||||
|
||||
|
||||
def test_union_with_empty():
|
||||
q = Qube.from_dict({"a=1/2/3" : {"b=1" : {}},})
|
||||
assert q | Qube.empty() == q
|
||||
q = Qube.from_dict(
|
||||
{
|
||||
"a=1/2/3": {"b=1": {}},
|
||||
}
|
||||
)
|
||||
assert q | Qube.empty() == q
|
||||
|
||||
|
||||
def test_union_2():
|
||||
q = Qube.from_datacube({
|
||||
"class": "d1",
|
||||
"dataset": ["climate-dt", "another-value"],
|
||||
'generation': ['1', "2", "3"],
|
||||
})
|
||||
|
||||
r = Qube.from_datacube({
|
||||
"class": "d1",
|
||||
"dataset": ["weather-dt", "climate-dt"],
|
||||
'generation': ['1', "2", "3", "4"],
|
||||
})
|
||||
|
||||
u = Qube.from_dict({
|
||||
"class=d1" : {
|
||||
"dataset=climate-dt/weather-dt" : {
|
||||
"generation=1/2/3/4" : {},
|
||||
},
|
||||
"dataset=another-value" : {
|
||||
"generation=1/2/3" : {},
|
||||
},
|
||||
q = Qube.from_datacube(
|
||||
{
|
||||
"class": "d1",
|
||||
"dataset": ["climate-dt", "another-value"],
|
||||
"generation": ["1", "2", "3"],
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
r = Qube.from_datacube(
|
||||
{
|
||||
"class": "d1",
|
||||
"dataset": ["weather-dt", "climate-dt"],
|
||||
"generation": ["1", "2", "3", "4"],
|
||||
}
|
||||
)
|
||||
|
||||
u = Qube.from_dict(
|
||||
{
|
||||
"class=d1": {
|
||||
"dataset=climate-dt/weather-dt": {
|
||||
"generation=1/2/3/4": {},
|
||||
},
|
||||
"dataset=another-value": {
|
||||
"generation=1/2/3": {},
|
||||
},
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
assert q | r == u
|
||||
|
||||
|
||||
def test_difference():
|
||||
q = Qube.from_dict({"a=1/2/3/5" : {"b=1" : {}},})
|
||||
r = Qube.from_dict({"a=2/3/4" : {"b=1" : {}},})
|
||||
q = Qube.from_dict(
|
||||
{
|
||||
"a=1/2/3/5": {"b=1": {}},
|
||||
}
|
||||
)
|
||||
r = Qube.from_dict(
|
||||
{
|
||||
"a=2/3/4": {"b=1": {}},
|
||||
}
|
||||
)
|
||||
|
||||
i = Qube.from_dict({
|
||||
"a=1/5" : {"b=1" : {}},
|
||||
|
||||
})
|
||||
i = Qube.from_dict(
|
||||
{
|
||||
"a=1/5": {"b=1": {}},
|
||||
}
|
||||
)
|
||||
|
||||
assert q - r == i
|
||||
|
||||
|
||||
def test_order_independence():
|
||||
u = Qube.from_dict({
|
||||
"a=4" : {"b=2" : {}},
|
||||
"a=1" : {"b=2" : {}, "b=1" : {}},
|
||||
"a=2/3" : {"b=1/2" : {}},
|
||||
u = Qube.from_dict(
|
||||
{
|
||||
"a=4": {"b=2": {}},
|
||||
"a=1": {"b=2": {}, "b=1": {}},
|
||||
"a=2/3": {"b=1/2": {}},
|
||||
}
|
||||
)
|
||||
|
||||
})
|
||||
|
||||
v = Qube.from_dict({
|
||||
"a=2/3" : {"b=1/2" : {}},
|
||||
"a=4" : {"b=2" : {}},
|
||||
"a=1" : {"b=1" : {}, "b=2" : {}},
|
||||
})
|
||||
v = Qube.from_dict(
|
||||
{
|
||||
"a=2/3": {"b=1/2": {}},
|
||||
"a=4": {"b=2": {}},
|
||||
"a=1": {"b=1": {}, "b=2": {}},
|
||||
}
|
||||
)
|
||||
|
||||
assert u == v
|
@ -2,28 +2,32 @@ from qubed import Qube
|
||||
|
||||
|
||||
def test_smoke():
|
||||
q = Qube.from_dict({
|
||||
"class=od" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
},
|
||||
"class=rd" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
},
|
||||
})
|
||||
q = Qube.from_dict(
|
||||
{
|
||||
"class=od": {
|
||||
"expver=0001": {"param=1": {}, "param=2": {}},
|
||||
"expver=0002": {"param=1": {}, "param=2": {}},
|
||||
},
|
||||
"class=rd": {
|
||||
"expver=0001": {"param=1": {}, "param=2": {}, "param=3": {}},
|
||||
"expver=0002": {"param=1": {}, "param=2": {}},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# root
|
||||
# ├── class=od, expver=0001/0002, param=1/2
|
||||
# └── class=rd
|
||||
# ├── expver=0001, param=1/2/3
|
||||
# └── expver=0002, param=1/2
|
||||
ct = Qube.from_dict({
|
||||
"class=od" : {"expver=0001/0002": {"param=1/2":{}}},
|
||||
"class=rd" : {
|
||||
"expver=0001": {"param=1/2/3":{}},
|
||||
"expver=0002": {"param=1/2":{}},
|
||||
},
|
||||
})
|
||||
ct = Qube.from_dict(
|
||||
{
|
||||
"class=od": {"expver=0001/0002": {"param=1/2": {}}},
|
||||
"class=rd": {
|
||||
"expver=0001": {"param=1/2/3": {}},
|
||||
"expver=0002": {"param=1/2": {}},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
assert q.compress() == ct
|
||||
assert q.compress() == ct
|
||||
|
@ -2,15 +2,17 @@ from qubed import Qube
|
||||
|
||||
|
||||
def test_json_round_trip():
|
||||
u = Qube.from_dict({
|
||||
"class=d1" : {
|
||||
"dataset=climate-dt/weather-dt" : {
|
||||
"generation=1/2/3/4" : {},
|
||||
},
|
||||
"dataset=another-value" : {
|
||||
"generation=1/2/3" : {},
|
||||
},
|
||||
u = Qube.from_dict(
|
||||
{
|
||||
"class=d1": {
|
||||
"dataset=climate-dt/weather-dt": {
|
||||
"generation=1/2/3/4": {},
|
||||
},
|
||||
"dataset=another-value": {
|
||||
"generation=1/2/3": {},
|
||||
},
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
json = u.to_json()
|
||||
assert Qube.from_json(json) == u
|
@ -1,18 +1,18 @@
|
||||
from qubed import Qube
|
||||
|
||||
d = {
|
||||
"class=od" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
"class=od": {
|
||||
"expver=0001": {"param=1": {}, "param=2": {}},
|
||||
"expver=0002": {"param=1": {}, "param=2": {}},
|
||||
},
|
||||
"class=rd" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
"class=rd": {
|
||||
"expver=0001": {"param=1": {}, "param=2": {}, "param=3": {}},
|
||||
"expver=0002": {"param=1": {}, "param=2": {}},
|
||||
},
|
||||
}
|
||||
q = Qube.from_dict(d).compress()
|
||||
|
||||
as_string= """
|
||||
as_string = """
|
||||
root
|
||||
├── class=od, expver=0001/0002, param=1/2
|
||||
└── class=rd
|
||||
@ -24,8 +24,10 @@ as_html = """
|
||||
<details open data-path="root"><summary class="qubed-node">root</summary><span class="qubed-node leaf" data-path="class=od,expver=0001/0002,param=1/2">├── class=od, expver=0001/0002, param=1/2</span><details open data-path="class=rd"><summary class="qubed-node">└── class=rd</summary><span class="qubed-node leaf" data-path="expver=0001,param=1/2/3"> ├── expver=0001, param=1/2/3</span><span class="qubed-node leaf" data-path="expver=0002,param=1/2"> └── expver=0002, param=1/2</span></details></details>
|
||||
""".strip()
|
||||
|
||||
|
||||
def test_string():
|
||||
assert str(q).strip() == as_string
|
||||
|
||||
|
||||
def test_html():
|
||||
assert as_html in q._repr_html_()
|
@ -3,17 +3,16 @@ from qubed import Qube
|
||||
|
||||
|
||||
def test_iter_leaves_simple():
|
||||
def make_hashable(l):
|
||||
for d in l:
|
||||
def make_hashable(list_like):
|
||||
for d in list_like:
|
||||
yield frozendict(d)
|
||||
q = Qube.from_dict({
|
||||
"a=1/2" : {"b=1/2" : {}}
|
||||
})
|
||||
|
||||
q = Qube.from_dict({"a=1/2": {"b=1/2": {}}})
|
||||
entries = [
|
||||
{"a" : '1', "b" : '1'},
|
||||
{"a" : '1', "b" : '2'},
|
||||
{"a" : '2', "b" : '1'},
|
||||
{"a" : '2', "b" : '2'},
|
||||
{"a": "1", "b": "1"},
|
||||
{"a": "1", "b": "2"},
|
||||
{"a": "2", "b": "1"},
|
||||
{"a": "2", "b": "2"},
|
||||
]
|
||||
|
||||
assert set(make_hashable(q.leaves())) == set(make_hashable(entries))
|
@ -1,19 +1,22 @@
|
||||
|
||||
from qubed import Qube
|
||||
|
||||
|
||||
def test_leaf_conservation():
|
||||
q = Qube.from_dict({
|
||||
"class=d1": {"dataset=climate-dt" : {
|
||||
"time=0000": {"param=130/134/137/146/147/151/165/166/167/168/169" : {}},
|
||||
"time=0001": {"param=130": {}},
|
||||
}}})
|
||||
q = Qube.from_dict(
|
||||
{
|
||||
"class=d1": {
|
||||
"dataset=climate-dt": {
|
||||
"time=0000": {
|
||||
"param=130/134/137/146/147/151/165/166/167/168/169": {}
|
||||
},
|
||||
"time=0001": {"param=130": {}},
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
r = Qube.from_datacube({
|
||||
"class": "d1",
|
||||
"dataset": "climate-dt",
|
||||
"time": "0001",
|
||||
"param": "134"
|
||||
})
|
||||
r = Qube.from_datacube(
|
||||
{"class": "d1", "dataset": "climate-dt", "time": "0001", "param": "134"}
|
||||
)
|
||||
|
||||
assert q.n_leaves + r.n_leaves == (q | r).n_leaves
|
@ -2,28 +2,32 @@ from qubed import Qube
|
||||
|
||||
|
||||
def test_smoke():
|
||||
q = Qube.from_dict({
|
||||
"class=od" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
},
|
||||
"class=rd" : {
|
||||
"expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
|
||||
"expver=0002": {"param=1":{}, "param=2":{}},
|
||||
},
|
||||
})
|
||||
q = Qube.from_dict(
|
||||
{
|
||||
"class=od": {
|
||||
"expver=0001": {"param=1": {}, "param=2": {}},
|
||||
"expver=0002": {"param=1": {}, "param=2": {}},
|
||||
},
|
||||
"class=rd": {
|
||||
"expver=0001": {"param=1": {}, "param=2": {}, "param=3": {}},
|
||||
"expver=0002": {"param=1": {}, "param=2": {}},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# root
|
||||
# ├── class=od, expver=0001/0002, param=1/2
|
||||
# └── class=rd
|
||||
# ├── expver=0001, param=1/2/3
|
||||
# └── expver=0002, param=1/2
|
||||
ct = Qube.from_dict({
|
||||
"class=od" : {"expver=0001/0002": {"param=1/2":{}}},
|
||||
"class=rd" : {
|
||||
"expver=0001": {"param=1/2/3":{}},
|
||||
"expver=0002": {"param=1/2":{}},
|
||||
},
|
||||
})
|
||||
ct = Qube.from_dict(
|
||||
{
|
||||
"class=od": {"expver=0001/0002": {"param=1/2": {}}},
|
||||
"class=rd": {
|
||||
"expver=0001": {"param=1/2/3": {}},
|
||||
"expver=0002": {"param=1/2": {}},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
assert q.compress() == ct
|
||||
assert q.compress() == ct
|
||||
|
@ -18,13 +18,16 @@ CORS(app, resources={r"/api/*": {"origins": "*"}})
|
||||
# This line tells flask to look at HTTP headers set by the TLS proxy to figure out what the original
|
||||
# Traffic looked like.
|
||||
# See https://flask.palletsprojects.com/en/3.0.x/deploying/proxy_fix/
|
||||
app.wsgi_app = ProxyFix(
|
||||
app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1
|
||||
)
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
|
||||
|
||||
config = {}
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return render_template("index.html", request = request, config = config, api_url = os.environ.get("API_URL", "/api/stac"))
|
||||
|
||||
return render_template(
|
||||
"index.html",
|
||||
request=request,
|
||||
config=config,
|
||||
api_url=os.environ.get("API_URL", "/api/stac"),
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user