Update stac server and frontend

This commit is contained in:
Tom 2025-03-28 17:50:29 +00:00
parent 275831d186
commit 79b97fd813
12 changed files with 287 additions and 219 deletions

1
.gitignore vendored
View File

@ -19,3 +19,4 @@ src/python/qubed/_version.py
*.ipynb
cmake_build/
tests/data/
*.secret

View File

@ -242,6 +242,17 @@ class Qube:
else:
yield leaf
def leaf_nodes(self) -> "Iterable[tuple[dict[str, str], Qube]]":
for value in self.values:
if not self.children:
yield ({self.key: value}, self)
for child in self.children:
for leaf in child.leaf_nodes():
if self.key != "root":
yield ({self.key: value, **leaf[0]}, leaf[1])
else:
yield leaf
def leaves_with_metadata(
self, indices=()
) -> Iterable[tuple[dict[str, str], dict[str, str]]]:
@ -375,35 +386,54 @@ class Qube:
if consume and not node.children and selection:
return None
# Check if the key is specified in the selection
# If the key isn't in the selection then what we do depends on the mode:
# In strict mode we just stop here
# In next_level mode we include the next level down so you can tell what keys to add next
# In relaxed mode we skip the key if it't not in the request and carry on
if node.key not in selection:
if mode == "strict":
return None
new_children = not_none(select(c, selection) for c in node.children)
# If this node doesn't exist in the
elif mode == "next_level":
return node.replace(
children=(),
metadata=self.metadata | {"is_leaf": not bool(self.children)},
)
# prune==true then remove any non-leaf nodes
# which have had all their children removed
if prune and node.children and not new_children:
elif mode == "relaxed":
pass
else:
raise ValueError(f"Unknown mode argument {mode}")
# If the key IS in the selection then check if the values match
if node.key in selection:
# If the key is specified, check if any of the values match
selection_criteria = selection[node.key]
if isinstance(selection_criteria, Callable):
values = QEnum((c for c in node.values if selection_criteria(c)))
else:
values = QEnum((c for c in selection[node.key] if c in node.values))
# Here modes don't matter because we've explicitly filtered on this key and found nothing
if not values:
return None
return node.replace(children=new_children)
# If the key is specified, check if any of the values match
selection_criteria = selection[node.key]
if isinstance(selection_criteria, Callable):
values = QEnum((c for c in node.values if selection_criteria(c)))
else:
values = QEnum((c for c in selection[node.key] if c in node.values))
if not values:
return None
node = node.replace(values=values)
if consume:
selection = {k: v for k, v in selection.items() if k != node.key}
# Prune nodes that had had all their children pruned
new_children = not_none(select(c, selection) for c in node.children)
# if node.key == "dataset": print(prune, [(c.key, c.values.values) for c in node.children], [c.key for c in new_children])
if prune and node.children and not new_children:
return None
return node.replace(
values=values,
children=not_none(select(c, selection) for c in node.children),
children=new_children,
metadata=self.metadata | {"is_leaf": not bool(new_children)},
)
return self.replace(

View File

@ -22,7 +22,7 @@ class HTML:
def summarize_node(
node: TreeLike, collapse=False, **kwargs
node: TreeLike, collapse=False, max_summary_length=50, **kwargs
) -> tuple[str, str, TreeLike]:
"""
Extracts a summarized representation of the node while collapsing single-child paths.
@ -33,9 +33,11 @@ def summarize_node(
while True:
summary = node.summary(**kwargs)
if "is_leaf" in node.metadata and node.metadata["is_leaf"]:
summary += "🌿"
paths.append(summary)
if len(summary) > 50:
summary = summary[:50] + "..."
if len(summary) > max_summary_length:
summary = summary[:max_summary_length] + "..."
summaries.append(summary)
if not collapse:
break
@ -96,8 +98,11 @@ def _node_tree_to_html(
yield "</details>"
def node_tree_to_html(node: TreeLike, depth=1, **kwargs) -> str:
css_id = f"qubed-tree-{random.randint(0, 1000000)}"
def node_tree_to_html(
node: TreeLike, depth=1, include_css=True, include_js=True, css_id=None, **kwargs
) -> str:
if css_id is None:
css_id = f"qubed-tree-{random.randint(0, 1000000)}"
# It's ugle to use an f string here because css uses {} so much so instead
# we use CSS_ID as a placeholder and replace it later
@ -180,4 +185,4 @@ def node_tree_to_html(node: TreeLike, depth=1, **kwargs) -> str:
</script>
""".replace("CSS_ID", css_id)
nodes = "".join(_node_tree_to_html(node=node, depth=depth, **kwargs))
return f"{js}{css}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"
return f"{js if include_js else ''}{css if include_css else ''}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"

View File

@ -1,17 +1,19 @@
import json
import os
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict
import redis
import requests
import yaml
from fastapi import FastAPI, Request
from fastapi import Depends, FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from tree_traverser import CompressedTree
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from frozendict import frozendict
from qubed import Qube
from qubed.tree_formatters import node_tree_to_html
app = FastAPI()
security = HTTPBearer()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
@ -20,39 +22,50 @@ app.add_middleware(
allow_headers=["*"],
)
@app.get("/favicon.ico", include_in_schema=False)
async def favicon():
return FileResponse("favicon.ico")
qubes: dict[str, Qube] = {}
print("Getting climate and extremes dt data from github")
qubes["climate-dt"] = Qube.from_json(
requests.get(
"https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json"
).json()
)
qubes["extremes-dt"] = Qube.from_json(
requests.get(
"https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/extremes_dt.json"
).json()
)
mars_language = yaml.safe_load(
requests.get(
"https://github.com/ecmwf/qubed/raw/refs/heads/main/config/climate-dt/language.yaml"
).content
)
if "LOCAL_CACHE" in os.environ:
print("Getting data from local file")
base = Path(os.environ["LOCAL_CACHE"])
with open(base / "compressed_tree.json", "r") as f:
json_tree = f.read()
with open(base / "language.yaml", "r") as f:
mars_language = yaml.safe_load(f)["_field"]
if "API_KEY" in os.environ:
print("Getting data from local file")
else:
print("Getting cache from redis")
r = redis.Redis(host="redis", port=6379, db=0)
json_tree = r.get("compressed_catalog")
assert json_tree, "No compressed tree found in redis"
mars_language = json.loads(r.get("mars_language"))
print("Loading tree from json")
c_tree = CompressedTree.from_json(json.loads(json_tree))
print("Partialy decompressing tree, shoud be able to skip this step in future.")
tree = c_tree.reconstruct_compressed_ecmwf_style()
with open("api_key.secret", "r") as f:
api_key = f.read()
print("Ready to serve requests!")
def request_to_dict(request: Request) -> Dict[str, Any]:
def validate_key(key: str):
if key not in qubes:
raise HTTPException(status_code=404, detail=f"Qube {key} not found")
return key
async def get_body_json(request: Request):
return await request.json()
def parse_request(request: Request) -> dict[str, str | list[str]]:
# Convert query parameters to dictionary format
request_dict = dict(request.query_params)
for key, value in request_dict.items():
@ -63,116 +76,54 @@ def request_to_dict(request: Request) -> Dict[str, Any]:
return request_dict
def match_against_cache(request, tree):
if not tree:
return {"_END_": {}}
matches = {}
for k, subtree in tree.items():
if len(k.split("=")) != 2:
raise ValueError(f"Key {k} is not in the correct format")
key, values = k.split("=")
values = set(values.split(","))
if key in request:
if isinstance(request[key], list):
matching_values = ",".join(
request_value
for request_value in request[key]
if request_value in values
)
if matching_values:
matches[f"{key}={matching_values}"] = match_against_cache(
request, subtree
)
elif request[key] in values:
matches[f"{key}={request[key]}"] = match_against_cache(request, subtree)
if not matches:
return {k: {} for k in tree.keys()}
return matches
def validate_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
if credentials.credentials != api_key:
raise HTTPException(status_code=403, detail="Incorrect API Key")
return credentials
def max_tree_depth(tree):
"Figure out the maximum depth of a tree"
if not tree:
return 0
return 1 + max(max_tree_depth(v) for v in tree.values())
@app.get("/favicon.ico", include_in_schema=False)
async def favicon():
return FileResponse("favicon.ico")
def prune_short_branches(tree, depth=None):
if depth is None:
depth = max_tree_depth(tree)
return {
k: prune_short_branches(v, depth - 1)
for k, v in tree.items()
if max_tree_depth(v) == depth - 1
}
@app.get("/api/v1/keys/")
async def keys():
return list(qubes.keys())
def get_paths_to_leaves(tree):
for k, v in tree.items():
if not v:
yield [
k,
]
else:
for leaf in get_paths_to_leaves(v):
yield [
k,
] + leaf
@app.get("/api/v1/get/{key}/")
async def get(
key: str = Depends(validate_key),
request: dict[str, str | list[str]] = Depends(parse_request),
):
return qubes[key].to_json()
def get_leaves(tree):
for k, v in tree.items():
if not v:
yield k
else:
for leaf in get_leaves(v):
yield leaf
@app.post("/api/v1/union/{key}/")
async def union(
key: str,
credentials: HTTPAuthorizationCredentials = Depends(validate_api_key),
body_json=Depends(get_body_json),
):
if key not in qubes:
qubes[key] = Qube.empty()
q = Qube.from_json(body_json)
qubes[key] = qubes[key] | q
return qubes[key].to_json()
@app.get("/api/tree")
async def get_tree(request: Request):
request_dict = request_to_dict(request)
print(c_tree.multi_match(request_dict))
return c_tree.multi_match(request_dict)
@app.get("/api/match")
async def get_match(request: Request):
# Convert query parameters to dictionary format
request_dict = request_to_dict(request)
# Run the schema matching logic
match_tree = match_against_cache(request_dict, tree)
# Prune the tree to only include branches that are as deep as the deepest match
# This means if you don't choose a certain branch at some point
# the UI won't keep nagging you to choose a value for that branch
match_tree = prune_short_branches(match_tree)
return match_tree
@app.get("/api/paths")
async def api_paths(request: Request):
request_dict = request_to_dict(request)
match_tree = match_against_cache(request_dict, tree)
match_tree = prune_short_branches(match_tree)
paths = get_paths_to_leaves(match_tree)
# deduplicate leaves based on the key
def follow_query(request: dict[str, str | list[str]], qube: Qube):
s = qube.select(request, mode="next_level", prune=True, consume=False)
by_path = defaultdict(lambda: {"paths": set(), "values": set()})
for p in paths:
if p[-1] == "_END_":
continue
key, values = p[-1].split("=")
values = values.split(",")
path = tuple(p[:-1])
by_path[key]["values"].update(values)
by_path[key]["paths"].add(tuple(path))
for request, node in s.leaf_nodes():
if not node.data.metadata["is_leaf"]:
by_path[node.key]["values"].update(node.values.values)
by_path[node.key]["paths"].add(frozendict(request))
return [
return s, [
{
"paths": list(v["paths"]),
"key": key,
@ -182,25 +133,28 @@ async def api_paths(request: Request):
]
@app.get("/api/stac")
async def get_STAC(request: Request):
request_dict = request_to_dict(request)
paths = await api_paths(request)
@app.get("/api/v1/query/{key}")
async def query(
key: str = Depends(validate_key),
request: dict[str, str | list[str]] = Depends(parse_request),
):
qube, paths = follow_query(request, qubes[key])
return paths
@app.get("/api/v1/stac/{key}/")
async def get_STAC(
key: str = Depends(validate_key),
request: dict[str, str | list[str]] = Depends(parse_request),
):
qube, paths = follow_query(request, qubes[key])
def make_link(key_name, paths, values):
"""Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link"""
path = paths[0]
href_template = f"/stac?{'&'.join(path)}{'&' if path else ''}{key_name}={{}}"
optional = [False]
# optional_str = (
# "Yes"
# if all(optional) and len(optional) > 0
# else ("Sometimes" if any(optional) else "No")
# )
values_from_mars_language = mars_language.get(key_name, {}).get("values", [])
# values = [v[0] if isinstance(v, list) else v for v in values_from_mars_language]
if all(isinstance(v, list) for v in values_from_mars_language):
value_descriptions_dict = {
k: v[-1]
@ -214,17 +168,19 @@ async def get_STAC(request: Request):
return {
"title": key_name,
"generalized_datacube:href_template": href_template,
"uriTemplate": href_template,
"rel": "child",
"type": "application/json",
"generalized_datacube:dimension": {
"type": mars_language.get(key_name, {}).get("type", ""),
"description": mars_language.get(key_name, {}).get("description", ""),
"values": values,
"value_descriptions": value_descriptions,
"optional": any(optional),
"multiple": True,
"paths": paths,
"variables": {
key: {
"type": "string",
"description": mars_language.get(key_name, {}).get(
"description", ""
),
"enum": values,
"value_descriptions": value_descriptions,
# "paths": paths,
}
},
}
@ -242,7 +198,7 @@ async def get_STAC(request: Request):
"description": mars_language.get(key, {}).get("description", ""),
"value_descriptions": value_descriptions(key, values),
}
for key, values in request_dict.items()
for key, values in request.items()
}
# Format the response as a STAC collection
@ -253,9 +209,18 @@ async def get_STAC(request: Request):
"description": "STAC collection representing potential children of this request",
"links": [make_link(p["key"], p["paths"], p["values"]) for p in paths],
"debug": {
"request": request_dict,
# "request": request,
"descriptions": descriptions,
"paths": paths,
# "paths": paths,
"qube": node_tree_to_html(
qube.compress(),
collapse=True,
depth=10,
include_css=False,
include_js=False,
max_summary_length=200,
css_id="qube",
),
},
}

View File

@ -1,3 +1,4 @@
fastapi[standard]
pe
redis
frozendict

View File

@ -7,18 +7,36 @@
- **Extension [Maturity Classification](https://github.com/radiantearth/stac-spec/tree/master/extensions/README.md#extension-maturity):** Proposal
- **Owner**: @TomHodson
This STAC extension allows for represention of [generalised datacubes][gen_datacubes].
This STAC extension borrows the [Draft OGC Records API](https://docs.ogc.org/DRAFTS/20-004.html), specifically the [templated links section](https://docs.ogc.org/DRAFTS/20-004.html#sc_templated_links_with_variables) to give STAC the ability to index very large datasets that conform to a generalised datacube model.
A datacube has a fixed set of dimensions `[a, b, c..]` , each of which have a fixed span `{a: ["temp","rainfall"], b : [1-7], c:[True, False]}` such that we can access data by indexing, i.e providing a value for each axis, `a="rainfall", b=1, ...`. A generalised datacubes allow the dimensions to change during indexing, so choosing `a="rainfall"` might yield a different set of axes from `a="temp"`.
A typical datacube has a fixed set of dimensions `[a, b, c..]` , each of which have a fixed span `{a: ["temp","rainfall"], b : [1-7], c:[True, False]}` such that we can access data by indexing, i.e providing a value for each axis, `a="rainfall", b=1, ...`. A generalized datacube, by our defintion, allow the dimensions to change during indexing, so choosing `a="rainfall"` might yield a different set of axes from `a="temp"`.
The [STAC Datacube][datacube_extension] extension serves the needs of datacubes that appear in STAC as Items or Collections, i.e as leaves in the tree. This extension instead focussing on allowing STAC to serve as an interface to dynamically explore the branches of generalised datacubes. It does this by adding additional metadata to the children of Catalog entries.
The [STAC Datacube][datacube_extension] extension serves the needs of datacubes that appear in STAC as Items or Collections, i.e as leaves in the tree. This extension instead focussing on allowing STAC to serve as an interface to dynamically explore the branches of generalised datacubes. It does this by adding additional metadata from the OGC Records standard to the children of Catalog entries.
We take the *Dimension Objects* defined by the [Datacube Extension][datacube_extension] and add them to [Link objects][link_objects] under the key `generalized_datacube:dimension`. This enables a single Link Object to represent a whole axis and its allowed values. Since `href` must now be constructed dynamically, we rempve it and add a `generalized_datacube:href_template` attribute to communicate how to construct the URLs corresponding to particular choice of value or values.
In practice what this proposal does is:
In order to support more complex slicing operations in which multiple indices may be selected for a given dimensions we also add additional optional attributes to all *Dimension Objects*, these are:
1. Replace `"links":` with `"linkTemplates":` in the Catalog entry following the example of the OGC Records API.
2. To each `rel: Child` object in `linkTemplates`:
a. Add a `variables` key following the OGC Records API with a list of entries like:
```json
"format": {
"type": "string",
"enum": [
"application/vnd.google-earth.kml+xml",
"application/vnd.google-earth.kmz",
"image/png",
"image/jpeg",
"image/gif",
"image/png; mode=8bit",
"application/x-pdf",
"image/svg+xml",
"image/tiff"
]
}
```
b. Add a "uriTemplate" key that specifies how to contruct the resulting URL: i.e `http://hostname.tld/app/index.html?class=od&format={}`
* `optional` : bool whether this dimension can be skipped.
* `multiple` : boo wether multiple values can be selected for this key.
This enables a child object to represent a whole axis and its allowed values. Since `href` must now be constructed dynamically, we rempve it and add a `generalized_datacube:href_template` attribute to communicate how to construct the URLs corresponding to particular choice of value or values.
[gen_datacubes]: https://github.com/ecmwf/datacube-spec
[link_objects]: https://github.com/radiantearth/stac-spec/blob/master/commons/links.md#link-object
@ -34,19 +52,20 @@ A typical `Catalog` entry with this extension:
"id": "rainfall",
"stac_version": "1.0.0",
"description": "ECMWF's Operational Data Archive",
"links": [
"linkTemplates": [
{
"title": "Expver - Experiment Version",
"generalized_datacube:href_template": "http://136.156.129.226/app/index.html?class=od&expver={}",
"rel": "child",
"title": "Expver - Experiment Version",
"uriTemplate": "http://hostname.tld/app/index.html?class=od&expver={}",
"type": "application/json",
"generalized_datacube:dimension" : {
"type" : "enum",
"description": "Experiment version, 0001 selects operational data.",
"values" : ["0001", "xxxx"],
"value_descriptions" : ["Operational Data", "Experimental Data"],
"optional" : false,
"multiple": true,
"variables" : {
"expver" : {
"description": "Experiment version, 0001 selects operational data.",
"type" : "string",
"enum" : ["0001", "xxxx"],
"value_descriptions" : ["Operational Data", "Experimental Data"],
"optional" : false,
}
}
""

View File

@ -29,5 +29,5 @@ def index():
"index.html",
request=request,
config=config,
api_url=os.environ.get("API_URL", "/api/stac"),
api_url=os.environ.get("API_URL", "/api/v1/stac"),
)

View File

@ -1,2 +1,2 @@
export API_URL="http://127.0.0.1:8124/api/stac"
export API_URL="http://127.0.0.1:8124/api/v1/stac/climate-dt"
flask run --debug --port=5006

View File

@ -138,39 +138,23 @@ async function createCatalogItem(link, itemsContainer) {
// Update the item div with real content
itemDiv.classList.remove("loading");
const dimension = link["generalized_datacube:dimension"];
const variables = link["variables"];
const key = Object.keys(variables)[0];
const variable = variables[key];
// add data-key attribute to the itemDiv
itemDiv.dataset.key = link.title;
itemDiv.dataset.keyType = dimension.type;
itemDiv.dataset.keyType = variable.type;
itemDiv.innerHTML = `
<h3 class="item-title">${link.title || "No title available"}</h3>
<p class="item-type">Key Type: ${itemDiv.dataset.keyType || "Unknown"}</p>
<!-- <p class="item-type">Paths: ${dimension.paths}</p> -->
<p class="item-type">Optional: ${dimension.optional ? "Yes" : "No"}</p>
<p class="item-description">${
dimension.description
? dimension.description.slice(0, 100)
: "No description available"
}...</p>
variable.description ? variable.description.slice(0, 100) : ""
}</p>
`;
// if (dimension.type === "date" || dimension.type === "time") {
// // Render a date picker for the "date" key
// const picker = `<input type="${link.title}" name="${link.title}">`;
// //convert picker to HTML node
// const pickerNode = document
// .createRange()
// .createContextualFragment(picker);
// itemDiv.appendChild(pickerNode);
// }
// Otherwise create a scrollable list with checkboxes for values if available
if (
// dimension.type === "enum" &&
dimension.values &&
dimension.values.length > 0
) {
if (variable.enum && variable.enum.length > 0) {
const listContainer = renderCheckboxList(link);
itemDiv.appendChild(listContainer);
} else {
@ -185,14 +169,16 @@ async function createCatalogItem(link, itemsContainer) {
}
function renderCheckboxList(link) {
const dimension = link["generalized_datacube:dimension"];
const value_descriptions = dimension.value_descriptions || [];
const variables = link["variables"];
const key = Object.keys(variables)[0];
const variable = variables[key];
const value_descriptions = variable.value_descriptions || [];
const listContainerHTML = `
<div class="item-list-container">
<label class="list-label">Select one or more values:</label>
<div class="scrollable-list">
${dimension.values
${variable.enum
.map((value, index) => {
const labelText = value_descriptions[index]
? `${value} - ${value_descriptions[index]}`
@ -201,7 +187,7 @@ function renderCheckboxList(link) {
<div class="checkbox-container">
<label class="checkbox-label">
<input type="checkbox" class="item-checkbox" value="${value}" ${
dimension.values.length === 1 ? "checked" : ""
variable.enum.length === 1 ? "checked" : ""
}>
${labelText}
</label>
@ -268,8 +254,10 @@ function renderRawSTACResponse(catalog) {
itemDetails.textContent = JSON.stringify(just_stac, null, 2);
const debug_container = document.getElementById("debug");
// create new object without debug key
debug_container.textContent = JSON.stringify(catalog.debug, null, 2);
const qube_container = document.getElementById("qube");
qube_container.innerHTML = catalog.debug.qube;
}
// Fetch STAC catalog and display items

View File

@ -0,0 +1,50 @@
pre#qube {
font-family: monospace;
white-space: pre;
font-family: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;
font-size: 12px;
line-height: 1.4;
details {
margin-left: 0;
}
.qubed-node a {
margin-left: 10px;
text-decoration: none;
}
summary {
list-style: none;
cursor: pointer;
text-overflow: ellipsis;
overflow: hidden;
text-wrap: nowrap;
display: block;
}
summary:hover,span.leaf:hover {
background-color: #f0f0f0;
}
details > summary::after {
content: ' ▲';
}
details:not([open]) > summary::after {
content: " ▼";
}
.leaf {
text-overflow: ellipsis;
overflow: hidden;
text-wrap: nowrap;
display: block;
}
summary::-webkit-details-marker {
display: none;
content: "";
}
}

View File

@ -209,3 +209,7 @@ span.value:hover {
width: 100%;
}
}
details h2 {
font-size: medium;
}

View File

@ -5,6 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>ECMWF DestinE STAC Viewer</title>
<link rel="stylesheet" href="/static/styles.css" />
<link rel="stylesheet" href="/static/qube_styles.css" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/json.min.js"></script>
@ -38,8 +39,12 @@
}
</code></pre>
<!-- Container to show the current tree -->
<h2>Currently Selected Tree</h2></summary>
<pre id = "qube"></pre>
<!-- Container fo the raw STAC response -->
<details open>
<details>
<summary><h2>Raw STAC Response</h2></summary>
<p>See the <a href="https://github.com/ecmwf-projects/catalogs/blob/main/structured_stac.md">STAC Extension Proposal</a> for more details on the format.</p>
<pre class="json-pre"><code id="raw-stac" class="language-json"></code></pre>