From e00a5ff9199958ec2e7527ee44a4462096656350 Mon Sep 17 00:00:00 2001 From: Tom Hodson Date: Thu, 21 Nov 2024 13:56:18 +0000 Subject: [PATCH] add stac_servver --- stac_server/favicon.ico | Bin 0 -> 15406 bytes stac_server/main.py | 253 +++++++++++++++++++++++++++++++++++ stac_server/requirements.txt | 3 + stac_server/run.sh | 3 + 4 files changed, 259 insertions(+) create mode 100644 stac_server/favicon.ico create mode 100644 stac_server/main.py create mode 100644 stac_server/requirements.txt create mode 100755 stac_server/run.sh diff --git a/stac_server/favicon.ico b/stac_server/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..3663761885fecbef48b1c75784c3b759b77c102a GIT binary patch literal 15406 zcmeHOd2Ae48DCRc3KUU}ic&QYAV3w6NJ|AsC5WR!DlL@|h^iG5mD27yEfP)Q-2^I< zNt1YI*PGZm?Ck8?-o*AE9$)d9IF9Yy2TmO4axMpyliYV`nkE^3->jcK`*vn`cD#xC zBbG*Q=gs@x_xry0z3;vEz0WpVj_o1aQ$ z`29OJ+vX>1wr9`=B%p}bqsYqN9M|OU3QqgZSx)tR0L?ls(Kb~VKJ<;;{ zV~X-;oM808oj@BdcYiZ1iHC_sF7+y_NM1!7>BEh1sFubgc1wHQ(?WUH} zAv#9S%AWRROrE)nj+NlO1&?xZf14rw72v3bTqGl-V`U`ZxztoyWJt>}tcoor40mNg zc!beXiMsPEDhkumw?$R@+uhv)tbO#%(v(72POgfhmY|yA2C$A z>^DA`I_cSaiNTQfke_wb-7Eg-c#|HWmqYi0zZu@~ad32In?LcM<+4jd<2cE zzSLA3_+C@ge|r9$Jar$F#M^|c-%7JqxJp#@4L0j3a_@v0i`MCSF{++>KrnQ zdVE{M3ze-a%auKA!|Fr(hE>%|6zHGFpvdL(6~fRM^Y6o0grI|cGpFY|*!UQg;OpRd zx64bE6FXN7t($Dmz{)dwnw7P~>*x@cno!~CRyp*f5ih>Jve-QivfR+}-wqj#)_umG zmHq3Z3hk9N^mijc_b+)qM(^0e#Wb|aHt3r5ZMNLEE~4d6zk6xjGvdkWWl8)RWdE4) zZ^2k!JFLIoNr$DY&Yy^3X?h(*CHEA@o619*<&0%#xc;-NDUrV<`k9d_1Mo^<;&=L zJL2aO)+g;@885vO?M|__G?!UFR!ep+VtvBCKd;A^)nR?v!P>S(7LAxc>5%n@@_M>C z81;)G);@mTT4@;r`71gju>LT%o0*(OCFVZP2E%^5ud~uJ|A8$v8r~w-2ajOHm(|mm zJS=%GF${DzzR+)c<}|VkIukcl7Har=ZioHeZ!1l+$*$4~EjO!EF3{1Y%RJdKD2bkvk6YidiB z=BgsBx5GBe+Mv9R`nUD^Hd_}hbw7)K4ruz+*px0$_Wg9MM109CpH%HA_7juD8NhHl z9S-RGDs0D>&d;XeXY{RY(V|b${u*{sgr^f&_TuTs-g|v~U+#XLkEa>+^`Kr~0~9Ag zh}QNdp3#nz0D_gj9AQT8*x4?-bC&b%ZHTcP7o3ibGhOy+g6plP={#(u)B8a~*kIr)q1Rc&Uv{blI(Us{Z*e#5?MMh$Qxki2!C^oznA#Z^$iRSW^}D>LwbkJcV-JaozbfN`QxJ{`4g-C z;?KeX?}X(`^1qZ0JADT((|(f409)&tWsfk;n$5(@#2-Y0p1HuiTh@3LYuWFOGsOyD_=^TJRp zF#(?@#*m`^4!)hkxGam7&mGfRD@{Qx`D$&&BE=(2Nr%4+er6m$e(ByTai0O6hyi*s z({~zqU{_}vqa`;f7EVun)snaj`s0wXpHXmJpt%TrZ;RV<%Co6@&s`hxDRp@0&OXH$ z41I$Pz`a;$sg_h3XdHZVrrm_y+#%Z;iOp0m&3^$H4yM9Uhu8w;FC5+?D-_$MyL@`+ zF8$yCtW)S7A~EKZtOJUvHINMPaX3{`BEBKyIYju!Zmh}k=Dt6i>yge@*msJpAjYv) z4?FQ&vAj^BINGHH>0@~mXF9kkrcm5i%|+0$s~E-sc_LSuqyAqp3|33J^GA%N6xeC5 z4UIMZt_Gh?&B26R1wPhg^iFvqI}rP=DUav2@O?us2P46GNqH)n%26E@|Ad%_?)_MA zM@2%Oj1g9&`fG>Vm4U4_y7PnP82Q(coCW2O_etFE?c;Sr<4*TYMz}JqKa4mm%Q513 zovvEyeIZkwQ+tYIQ)~^oPq-(8PL#C`k||p^E5}ug1L;F&KEOwNd=!_;R0g^SfUm6K z^r8C0qucAVrh6vi`F(BhO+^Xwi+ID1c2xyEPiKms?kGWjoX1ajMArx6d0~uqw#vQ9 zcZwv>?MSe|^xYI~(EaGsTl>`vV(eNpk==u4U>+;b;1I{3E%m zGAjq!C(oDO>rI}2nEr7V8Q}Zr$NVyj!)iO)H^mZj6I6M^IPWlQW=ddB+ND*n0*eJJMdCSL!ve>983YCGDe@_gy6(JVZT;s`o7x5dX^Kr*p{VgX#CkX0h_^t|MQ%9eGMT-*7-&p@%V5T~sRgG4>Dt z<@=&FlINSP_fvh1dV3(;vAX|~3aj3?=wi=Pu)7`}e`{o6R`+g9cG{D0xe#m|F2+g= zzORQdRUKj)k6=IO1O9|P1f5wZFD={aFwNWAav#I2EnBSKwNg$P_RwefF`#|o$(@Z^ zvm?r%=)nIO@O-d_-oXBSa;9r98y6Y_tYZneYK%uqxX4t#0<#aJlJc**8~nOHA2Fug zp<=J;ygr6oThh9R`J;QUPI(05g7 z|0-YFlkV!O$|#5C%Ar*X?b9gd1oo+i{}%0kt=f%N|MWW)3Ox#2oCo|L>B&Ix4jPM{ zYbsLO)Mdn1DEE?ZQ?80$U#Pnao0lV!@5?-nk-V4T|ANAPak-HU3=8z8#&$L|1=V=f z_V(p!Olwv3V#He#az^#=ldoGR2MfM260AnFBm?Z|1ITfn;mu-dd@12 zkohLp5-BU=P>{A6buivkxZs(ZK%zqm$?g literal 0 HcmV?d00001 diff --git a/stac_server/main.py b/stac_server/main.py new file mode 100644 index 0000000..736a521 --- /dev/null +++ b/stac_server/main.py @@ -0,0 +1,253 @@ +import json +import yaml +from pathlib import Path +import os +from datetime import datetime +from collections import defaultdict +from typing import Any, Dict +import yaml +import os + +from fastapi import FastAPI, Request +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from fastapi.responses import RedirectResponse, FileResponse +from fastapi.templating import Jinja2Templates + + +from TreeTraverser.fdb_schema import FDBSchemaFile +from TreeTraverser.CompressedTree import CompressedTree + +import redis + +app = FastAPI() +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +@app.get('/favicon.ico', include_in_schema=False) +async def favicon(): + return FileResponse("favicon.ico") + + +print("Getting cache from redis") +r = redis.Redis(host=os.environ.get("REDIS_HOST", "localhost"), port=6379, db=0) +json_data = r.get('compressed_catalog') +if not json_data: + raise ValueError("No compressed catalog found in Redis") +else: + print("Found compressed catalog in Redis") + +print("Loading tree to json") +compressed_tree_json = json.loads(json_data) +c_tree = CompressedTree.from_json(compressed_tree_json) + +print("Partialy decompressing tree, shoud be able to skip this step in future.") +tree = c_tree.reconstruct_compressed_ecmwf_style() + +print("Ready to serve requests!") + +base = os.environ.get("CONFIG_DIR", ".") +config = { + "fdb_schema": f"{base}/schema", + "mars_language": f"{base}/language.yaml", +} + +with open(config["mars_language"], "r") as f: + mars_language = yaml.safe_load(f)["_field"] + +###### Load FDB Schema +schema = FDBSchemaFile(config["fdb_schema"]) + +def request_to_dict(request: Request) -> Dict[str, Any]: + # Convert query parameters to dictionary format + request_dict = dict(request.query_params) + for key, value in request_dict.items(): + # Convert comma-separated values into lists + if "," in value: + request_dict[key] = value.split(",") + + return request_dict + +def match_against_cache(request, tree): + if not tree: return {"_END_" : {}} + matches = {} + for k, subtree in tree.items(): + if len(k.split("=")) != 2: + raise ValueError(f"Key {k} is not in the correct format") + key, values = k.split("=") + values = set(values.split(",")) + if key in request: + if isinstance(request[key], list): + matching_values = ",".join(request_value for request_value in request[key] if request_value in values) + if matching_values: + matches[f"{key}={matching_values}"] = match_against_cache(request, subtree) + elif request[key] in values: + matches[f"{key}={request[key]}"] = match_against_cache(request, subtree) + + if not matches: return {k : {} for k in tree.keys()} + return matches + + +def max_tree_depth(tree): + "Figure out the maximum depth of a tree" + if not tree: + return 0 + return 1 + max(max_tree_depth(v) for v in tree.values()) + +def prune_short_branches(tree, depth = None): + if depth is None: + depth = max_tree_depth(tree) + return {k : prune_short_branches(v, depth-1) for k, v in tree.items() if max_tree_depth(v) == depth-1} + +def get_paths_to_leaves(tree): + for k,v in tree.items(): + if not v: + yield [k,] + else: + for leaf in get_paths_to_leaves(v): + yield [k,] + leaf + +def get_leaves(tree): + for k,v in tree.items(): + if not v: + yield k + else: + for leaf in get_leaves(v): + yield leaf + +@app.get("/match") +async def get_match(request: Request): + # Convert query parameters to dictionary format + request_dict = request_to_dict(request) + + # Run the schema matching logic + match_tree = match_against_cache(request_dict, tree) + + + # Prune the tree to only include branches that are as deep as the deepest match + # This means if you don't choose a certain branch at some point + # the UI won't keep nagging you to choose a value for that branch + match_tree = prune_short_branches(match_tree) + + return match_tree + +@app.get("/paths") +async def api_paths(request: Request): + request_dict = request_to_dict(request) + match_tree = match_against_cache(request_dict, tree) + match_tree = prune_short_branches(match_tree) + paths = get_paths_to_leaves(match_tree) + + + # deduplicate leaves based on the key + by_path = defaultdict(lambda : {"paths" : set(), "values" : set()}) + for p in paths: + if p[-1] == "_END_": continue + key, values = p[-1].split("=") + values = values.split(",") + path = tuple(p[:-1]) + + by_path[key]["values"].update(values) + by_path[key]["paths"].add(tuple(path)) + + return [{ + "paths": list(v["paths"]), + "key": key, + "values": sorted(v["values"], reverse=True), + } for key, v in by_path.items()] + +@app.get("/stac") +async def get_STAC(request: Request): + request_dict = request_to_dict(request) + paths = await api_paths(request) + + # # Run the schema matching logic + # matches = schema.match_all(dict(v.split("=") for v in path)) + + # # Only take the longest matches + # max_len = max(len(m) for m in matches) + # matches = [m for m in matches if len(m) == max_len] + + # # Take the ends of all partial matches, ignore those that are full matches + # # Full matches are indicated by the last key having boolean value True + # key_frontier = defaultdict(list) + # for match in matches: + # if not match[-1]: + # key_frontier[match[-1].key].append([m for m in match[:-1]]) + + + def make_link(key_name, paths, values): + """Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link""" + path = paths[0] + href_template = f"/stac?{'&'.join(path)}{'&' if path else ''}{key_name}={{}}" + optional = [False] + optional_str = "Yes" if all(optional) and len(optional) > 0 else ("Sometimes" if any(optional) else "No") + values_from_mars_language = mars_language.get(key_name, {}).get("values", []) + + # values = [v[0] if isinstance(v, list) else v for v in values_from_mars_language] + + if all(isinstance(v, list) for v in values_from_mars_language): + value_descriptions_dict = {k : v[-1] + for v in values_from_mars_language + if len(v) > 1 + for k in v[:-1]} + value_descriptions = [value_descriptions_dict.get(v, "") for v in values] + if not any(value_descriptions): value_descriptions = None + + return { + "title": key_name, + "generalized_datacube:href_template": href_template, + "rel": "child", + "type": "application/json", + "generalized_datacube:dimension" : { + "type" : mars_language.get(key_name, {}).get("type", ""), + "description": mars_language.get(key_name, {}).get("description", ""), + "values" : values, + "value_descriptions" : value_descriptions, + "optional" : any(optional), + "multiple": True, + "paths" : paths, + } + + } + + + def value_descriptions(key, values): + return { + v[0] : v[-1] for v in mars_language.get(key, {}).get("values", []) + if len(v) > 1 and v[0] in values + } + + descriptions = { + key : { + "key" : key, + "values" : values, + "description" : mars_language.get(key, {}).get("description", ""), + "value_descriptions" : value_descriptions(key,values), + } + for key, values in request_dict.items() + } + + # Format the response as a STAC collection + stac_collection = { + "type": "Collection", + "stac_version": "1.0.0", + "id": "partial-matches", + "description": "STAC collection representing potential children of this request", + "links": [ + make_link(p["key"], p["paths"], p["values"]) + for p in paths + ], + "debug": { + "request": request_dict, + "descriptions": descriptions, + "paths" : paths, + } + } + + return stac_collection \ No newline at end of file diff --git a/stac_server/requirements.txt b/stac_server/requirements.txt new file mode 100644 index 0000000..5726302 --- /dev/null +++ b/stac_server/requirements.txt @@ -0,0 +1,3 @@ +fastapi[standard] +pe +redis \ No newline at end of file diff --git a/stac_server/run.sh b/stac_server/run.sh new file mode 100755 index 0000000..74f2741 --- /dev/null +++ b/stac_server/run.sh @@ -0,0 +1,3 @@ +parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) +cd "$parent_path" +REDIS_HOST=localhost CONFIG_DIR=../config/destinE fastapi dev ./main.py --port 8124 --reload \ No newline at end of file