update with compression

This commit is contained in:
Tom Hodson 2024-11-07 10:57:23 +00:00
parent 5765da7ecc
commit e7f49e5898
14 changed files with 490 additions and 67 deletions

View File

@ -9,6 +9,8 @@ from fastapi.staticfiles import StaticFiles
from fdb_schema import FDBSchemaFile
from fastapi.responses import RedirectResponse
from fastapi.templating import Jinja2Templates
import json
import yaml
import os
os.environ["FDB5_CONFIG_FILE"] = "/home/eouser/destine_remoteFDB_config.yaml"
@ -26,8 +28,8 @@ app.add_middleware(
allow_headers=["*"],
)
app.mount("/app", StaticFiles(directory="../webapp"), name="static")
templates = Jinja2Templates(directory="../webapp")
app.mount("/app", StaticFiles(directory="./webapp"), name="static")
templates = Jinja2Templates(directory="./webapp")
config = {
"message": "",
@ -38,14 +40,15 @@ if os.path.exists("../config.yaml"):
with open("../config.yaml", "r") as f:
config = config | yaml.safe_load(f)
print("Loading compressed_cache.json")
with open("../cache/compressed_cache.json", "r") as f:
list_cache = json.load(f)
@app.get("/")
async def redirect_to_app(request: Request):
return templates.TemplateResponse("index.html", {"request": request, "config": config})
import yaml
with open(config["mars_language"], "r") as f:
mars_language = yaml.safe_load(f)["_field"]

View File

@ -3,12 +3,14 @@
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>STAC Viewer</title>
<title>ECMWF DestinE STAC Viewer</title>
<link rel="stylesheet" href="/app/styles.css" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/json.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/python.min.js"></script>
<link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📚</text></svg>">
</head>
<body>
<div id="viewer">

9
cache/README.md vendored Normal file
View File

@ -0,0 +1,9 @@
This code builds the tree cache from fdb.list
A tried a fill run of the entire database using `list_entire_fdb.py`, it died after 14 hours and 89 million unique objects, perhaps because it ran out of memory.
The raw `cache.json` can be compressed using "tree_to_compressed.py" which folds identical subtrees and replaces the keys with "key=val1,val2,val3" strings.
For a 38MB cache.json, the compressed version is 40KB.
For 122MB it's 44KB.

View File

@ -0,0 +1 @@
{"class=d1": {"dataset=climate-dt": {"activity=cmip6": {"experiment=hist": {"generation=1": {"model=icon": {"realization=1": {"expver=0001": {"stream=clte": {"date=19910410": {"resolution=high": {"type=fc": {"levtype=sfc": {"time=0000": {"param=130": {}}}}}}}}}}}}}}}}

View File

@ -42,7 +42,7 @@ def compress_tree(tree, max_level = 5):
top_level = {k : cache_tree(cache, v) for k, v in tree.items() if k != "_count"}
return expand_tree_but_collapsed(cache, top_level, max_level = max_level)
def print_schema_tree(tree):
def pretty_schema_tree(tree):
name_cache = {}
names = set()
@ -77,11 +77,5 @@ def print_schema_tree(tree):
return out
schema_tree = tree_as_schema(tree)
for k, v in sorted(name_cache.items()):
# print(f"{k} : {','.join(sorted(v.split(","), key = int))}")
print(f"{v} : {k}")
print()
print(schema_tree)
groups = "\n".join(f"{v} : {k}" for k, v in sorted(name_cache.items()))
return groups + "\n" + schema_tree

1
cache/compressed_cache.json vendored Normal file

File diff suppressed because one or more lines are too long

77
cache/list_entire_fdb.py vendored Normal file
View File

@ -0,0 +1,77 @@
#! catalogs/.venv/bin/python
import time
from collections import defaultdict
import os
from fdb_schema import FDBSchemaFile
os.environ["FDB5_CONFIG_FILE"] = "/home/eouser/prod_remoteFDB.yaml"
import json
schema = FDBSchemaFile("/home/eouser/catalogs/backend/destinE_schema")
import pyfdb
from collections import Counter
import os, sys
from pathlib import Path
from datetime import datetime
from compress_tree import print_schema_tree, compress_tree
request = {
"class": "d1",
}
t0 = time.time()
fdb = pyfdb.FDB()
spans = defaultdict(set)
tree = {}
total = 0
for item in fdb.list(request, keys = True):
request = item["keys"]
_, m = schema.match(request)
loc = tree
for kv in m:
k = f'{kv.key}={kv.str_value()}'
# loc["_count"] = loc.get("_count", 0) + 1
if k not in loc: loc[k] = {}
loc = loc[k]
total += 1
if total % 10_000 == 0:
os.system("clear")
print(f"Total: {total/1e3:.0f} thousand")
print(f"Runtime: {(time.time() - t0):.0f} s")
print()
print(f"Last request:")
for k, v in request.items():
print(f"{k} : {v}")
# sys.exit()
if total % 1000_000 == 0:
print("Dumping cache to cache.json")
with open("cache.json", "w") as f:
json.dump(tree, f)
os.system("clear")
print(f"Total: {total}")
print(f"Runtime: {(time.time() - t0) / 60:.0f} mins")
cache = Path("cache.json")
if cache.exists():
backup = Path(f"backups/cache.json.backup.{datetime.now().strftime('%d.%m.%Y')}")
print(f"Moving cache to {backup}")
cache.rename(backup)
print("Dumping cache to cache.json")
with open("cache.json", "w") as f:
json.dump(tree, f)
print("Done")
sys.exit()

305
cache/pretty_compressed_cache.txt vendored Normal file
View File

@ -0,0 +1,305 @@
date_0 : date=19910410,19920514,19920519,19920527,19920617,19920625,19920626,19920629,19920720,19920724
date_1 : date=19910614,19920430,19920517
date_2 : date=19910703,19920525,19920601,19920605
date_3 : date=19910723,19920619,19920702,19920713,19920715
date_4 : date=19920423,19920529,19920618,19920704
date_5 : date=19920424,19920511,19920603,19920620,19920726
date_6 : date=19920425,19920428,19920507,19920607,19920611,19920701,19920712,19920721,19920725
date_7 : date=19920426,19920502,19920503,19920510,19920628,19920716,19920718
date_8 : date=19920427,19920508,19920516,19920622,19920708,19920801
date_9 : date=19920429,19920614,19920703,19920710,19920727
date_10 : date=19920501,19920512,19920522,19920524
date_11 : date=19920504,19920623,19920709,19920711
date_12 : date=19920505,19920515,19920531,19920610,19920612,19920615
date_13 : date=19920506,19920717,19920722
date_14 : date=19920509,19920608,19920627
date_15 : date=19920513,19920520,19920613
date_16 : date=19920518,19920602,19920719
date_17 : date=20200102,20200103,20200104,20200105,20200106,20200107,20200108,20200109,20200110,20200111,20200112,20200113,20200114,20200115,20200116,20200117,20200118,20200119,20200120,20200121,20200122,20200123,20200124,20200125,20200126,20200127,20200128,20200129,20200130,20200131,20200201,20200202,20200203,20200204,20200205,20200206,20200207,20200208,20200209,20200210,20200211,20200212,20200213,20200214,20200215,20200216,20200217,20200218,20200219,20200220,20200221,20200222,20200223,20200224,20200225,20200226,20200227,20200228,20200229,20200301,20200302,20200303,20200304,20200305,20200306,20200307,20200308,20200309,20200310,20200311,20200312,20200313,20200314,20200315,20200316,20200317,20200318,20200319,20200320,20200321,20200322,20200323,20200324,20200325,20200326,20200327,20200328,20200329,20200330,20200331,20200401,20200402,20200403,20200404,20200405,20200406,20200407,20200408,20200409,20200410,20200411,20200412,20200413,20200414,20200415,20200416,20200417,20200418,20200419,20200420,20200421,20200422,20200423,20200424,20200425,20200426,20200427,20200428,20200429,20200430,20200501,20200502,20200503,20200504,20200505,20200506,20200507,20200508,20200509,20200510,20200511,20200512,20200513,20200514,20200515,20200516,20200517,20200518,20200519,20200520,20200521,20200522,20200523,20200524,20200525,20200526,20200527,20200528,20200529,20200530,20200531,20200601,20200602,20200603,20200604,20200605,20200606,20200607,20200608,20200609,20200610,20200611,20200612,20200613,20200614,20200615,20200616,20200617,20200618,20200619,20200620,20200621,20200622,20200623,20200624,20200625,20200626,20200627,20200628,20200629,20200630,20200701,20200702,20200703,20200704,20200705,20200706,20200707,20200708,20200709,20200710,20200711,20200712,20200713,20200714,20200715,20200716,20200717,20200718,20200719,20200720,20200721,20200722,20200723,20200724,20200725,20200726,20200727,20200728,20200729,20200730,20200731,20200801,20200802,20200803,20200804,20200805,20200806,20200807,20200808,20200809,20200810,20200811,20200812,20200813,20200814,20200815,20200816,20200817,20200818,20200819,20200820,20200821,20200822,20200823,20200824,20200825,20200826,20200827,20200828,20200829,20200830,20200831,20200901,20200902,20200903,20200904,20200905,20200906,20200907,20200908,20200909,20200910,20200911,20200912,20200913,20200914,20200915,20200916,20200917,20200918,20200919,20200920,20200921,20200922,20200923,20200924,20200925,20200926,20200927,20200928,20200929,20200930,20201001,20201002,20201003,20201004,20201005,20201006,20201007,20201008,20201009,20201010,20201011,20201012,20201013,20201014,20201015,20201016,20201017,20201018,20201019,20201020,20201021,20201022,20201023,20201024,20201025,20201026,20201027,20201028,20201029,20201030,20201031,20201101,20201102,20201103,20201104,20201105,20201106,20201107,20201108,20201109,20201110,20201111,20201112,20201113,20201114,20201115,20201116,20201117,20201118,20201119,20201120,20201121,20201122,20201123,20201124,20201125,20201126,20201127,20201128,20201129,20201130,20201201,20201202,20201203,20201204,20201205,20201206,20201207,20201208,20201209,20201210,20201211,20201212,20201213,20201214,20201215,20201216,20201217,20201218,20201219,20201220,20201221,20201222,20201223,20201224,20201225,20201226,20201227,20201228,20201229,20201230,20201231,20210101,20210102,20210103,20210104,20210105,20210106,20210107,20210108,20210109,20210110,20210111,20210112,20210113,20210114,20210115,20210116,20210117,20210118,20210119,20210120,20210121,20210122,20210123,20210124,20210125,20210126,20210127,20210128,20210129,20210130,20210131,20210201,20210202,20210203,20210204,20210205,20210206,20210207,20210208,20210209,20210210,20210211,20210212,20210213,20210214,20210215,20210216,20210217,20210218,20210219,20210220,20210221,20210222,20210223,20210224,20210225,20210226,20210227,20210228,20210301,20210302,20210303,20210304,20210305,20210306,20210307,20210308,20210309,20210310,20210311,20210312,20210313,20210314,20210315,20210316,20210317,20210318,20210319,20210320,20210321,20210322,20210323,20210324,20210325,20210326,20210327,20210328,20210329,20210330,20210331,20210401,20210402,20210403,20210404,20210405,20210406,20210407,20210408,20210409,20210410,20210411,20210412,20210413,20210414,20210415,20210416,20210417,20210418,20210419,20210420,20210421,20210422,20210423,20210424,20210425,20210426,20210427,20210428,20210429,20210430,20210501,20210502,20210503,20210504,20210505,20210506,20210507,20210508,20210509,20210510,20210511,20210512,20210513,20210514,20210515,20210516,20210517,20210518,20210519,20210520,20210521,20210522,20210523,20210524,20210525,20210526,20210527,20210528,20210529,20210530,20210531,20210601,20210602,20210603,20210604,20210605,20210606,20210607,20210608,20210609,20210610,20210611,20210612,20210613,20210614,20210615,20210616,20210617,20210618,20210619
levelist_1 : levelist=10,11,12,13,14,15,16,17,18,19,1,20,21,22,23,24,25,26,27,28,29,2,30,31,32,33,34,35,36,37,38,39,3,40,41,42,43,44,45,46,47,48,49,4,50,51,52,53,54,55,56,57,58,59,5,60,61,62,63,64,65,66,67,68,69,6,70,71,72,7,8,9
levelist_2 : levelist=10,11,12,13,14,15,16,17,18,19,1,20,21,22,23,24,25,26,27,28,29,2,30,31,32,33,34,35,36,37,38,39,3,40,41,42,43,44,45,46,47,48,49,4,50,51,52,53,54,55,56,57,58,59,5,60,61,62,63,64,65,66,67,68,69,6,70,71,72,73,7,8,9
levelist_6 : levelist=10,11,12,13,14,15,16,17,18,19,1,20,21,22,23,24,25,26,27,28,29,2,30,31,32,33,34,35,36,37,38,39,3,40,41,42,43,44,45,46,47,48,49,4,50,51,52,53,54,55,56,57,58,59,5,60,61,62,63,64,65,66,67,68,69,6,70,71,72,73,74,75,7,8,9
levelist_3 : levelist=100,10,1,20,30,50,5,70
levelist_4 : levelist=100,10,150,1,20,30,50,5,70
levelist_5 : levelist=100,10,150,1,200,20,250,300,30,50,5,70
levelist_0 : levelist=1000,100,10,150,1,200,20,250,300,30,400,500,50,5,600,700,70,850,925
param_2 : param=0,263500,263501,263505,263506
param_1 : param=129,130,131,132,133,135,157,246
param_9 : param=129,130,131,132,133,135,157,246,60
param_11 : param=129,172,134,137,141,144,146,147,148,151,159,164,165,166,167,168,169,175,176,177,178,179,180,181,182,186,187,188,212,228,235,260048,78,79,8,9
param_0 : param=130,134,137,146,147,151,165,166,167,168,169,175,176,177,178,179,228164,235,260048,260654,260655,78,79
param_12 : param=134,137,141,144,146,147,148,151,159,164,165,166,167,168,169,175,176,177,178,179,180,181,182,186,187
param_8 : param=134,137,141,144,146,147,148,151,159,164,165,166,167,168,169,175,176,177,178,179,180,181,182,186,187,188,212,228,235,260048,78,79,8,9
param_10 : param=263000,263001,263003,263004,263008,263009,263021,263022,263100,263101,263121,263122,263124
param_7 : param=263000,263001,263003,263004,263008,263009,263100,263101,263121,263122,263124
param_3 : param=263000,263001,263003,263004,263009,263114,263124
param_4 : param=263500,263501,263505,263506
param_6 : param=263500,263501,263505,263506,263507
param_5 : param=263500,263501,263505,263506,263507,0
time_1 : time=0000,0100,0200,0300,0400,0500,0600,0700,0800,0900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900
time_6 : time=0000,0100,0200,0300,0400,0500,0600,0700,0800,0900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,2000,2100
time_2 : time=0000,0100,0200,0300,0400,0500,0600,0700,0800,0900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,2000,2100,2200
time_0 : time=0000,0100,0200,0300,0400,0500,0600,0700,0800,0900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,2000,2100,2200,2300
time_4 : time=0100,0200,0300,0400,0500,0600,0700,0800,0900,1000,1100
time_3 : time=0100,0200,0300,0400,0500,0600,0700,0800,0900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,2000,2100,2200,2300
time_5 : time=1200,1300,1400,1500,1600,1700,1800,1900,2000,2100,2200,2300
[class=d1, dataset=climate-dt,
[ activity=cmip6, experiment=hist, generation=1,
[ model=icon, realization=1,
[ expver=0001, stream=clte,
[ date_0, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ levtype=o2d, time=0000, param_3,
[ date_1, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=o2d, time=0000, param_3,
[ date_2, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=o3d, time=0000, levelist_1, param_4,
[ levtype=o2d, time=0000, param_3,
[ date_3, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ date=19920420,19920616, resolution=high, type=fc,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000, levelist_2, param=0,
[ levtype=o2d, time=0000, param_3,
[ date=19920421, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ date=19920422, resolution=high, type=fc,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000, levelist_2, param=0,
[ date_4, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o2d, time=0000, param_3,
[ date_5, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ date_6, resolution=high, type=fc,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ levtype=o2d, time=0000, param_3,
[ date_7, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ levtype=o2d, time=0000, param_3,
[ date_8, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000, levelist_1, param_4,
[ levtype=o2d, time=0000, param_3,
[ date_9, resolution=high, type=fc,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000, levelist_1, param_4,
[ levtype=o2d, time=0000, param_3,
[ date_10, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=o3d, time=0000, levelist_1, param_4,
[ date_11, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000, levelist_2, param=0,
[ date_12, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000, levelist_2, param=0,
[ levtype=o2d, time=0000, param_3,
[ date_13, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000, levelist_1, param_4,
[ date_14, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=o3d, time=0000, levelist_2, param=0,
[ date_15, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=o3d, time=0000, levelist_2, param=0,
[ levtype=o2d, time=0000, param_3,
[ date_16, resolution=high, type=fc,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o2d, time=0000, param_3,
[ date=19920521,19920523, resolution=high, type=fc,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000, levelist_1, param_4,
[ date=19920526,19920706, resolution=high, type=fc,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ date=19920528, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ levtype=o2d, time=0000, param_3,
[ levtype=pl,
[ time=0000, levelist_0, param_1,
[ time=0100,
[ levelist_3, param_1,
[ levelist=150, param=129,130,131,132,133,135,
[ date=19920530,19920609, resolution=high, type=fc, levtype=o3d, time=0000, levelist_2, param=0,
[ date=19920604,19920723, resolution=high, type=fc, levtype=pl, time_0, levelist_0, param_1,
[ date=19920606, resolution=high, type=fc, levtype=o2d, time=0000, param_3,
[ date=19920621, resolution=high, type=fc,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ levtype=pl,
[ time_1, levelist_0, param_1,
[ time=2000,
[ levelist_4, param_1,
[ levelist=200, param=129,130,131,132,133,135,
[ levtype=o2d, time=0000, param_3,
[ date=19920624,19920707, resolution=high, type=fc,
[ levtype=o3d, time=0000, levelist_1, param_4,
[ levtype=o2d, time=0000, param_3,
[ date=19920630, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl,
[ time_2, levelist_0, param_1,
[ time=2300, levelist=1, param=129,130,131,
[ levtype=o3d, time=0000, levelist_2, param=0,
[ date=19920705, resolution=high, type=fc,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ levtype=o2d, time=0000, param_3,
[ date=19920714, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl,
[ time_2, levelist_0, param_1,
[ time=2300,
[ levelist_5, param_1,
[ levelist=400, param=129,130,131,132,
[ levtype=o3d, time=0000,
[ levelist_1, param_2,
[ levelist=73, param=0,
[ levtype=o2d, time=0000, param_3,
[ date=19920728,19920731, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000,
[ levelist_1, param_5,
[ levelist=73, param=263507,0,
[ levtype=o2d, time=0000, param_3,
[ date=19920729,19920730, resolution=high, type=fc,
[ levtype=sfc, time_0, param_0,
[ levtype=pl, time_0, levelist_0, param_1,
[ levtype=o3d, time=0000,
[ levelist_1, param_6,
[ levelist=73, param=263507,
[ levtype=o2d, time=0000, param_3,
[ expver=t002, stream=clte, date=19900101, resolution=standard, type=fc, levtype=o2d, time=0000, param_7,
[ model=ifs-nemo, realization=1,
[ expver=0001, stream=clte, date=19900101, resolution=standard,high, type=fc,
[ levtype=sol, time_0, levelist=1,2,3,4,5, param=228141,
[ levtype=sfc, time_0, param_8,
[ levtype=pl, time_0, levelist_0, param_9,
[ levtype=o3d, time=0000, levelist_6, param_6,
[ levtype=o2d, time=0000, param_10,
[ levtype=hl, time_0, levelist=100, param=228246,228247,
[ expver=a0tn, stream=clte,
[ date=20200101,
[ resolution=standard, type=fc,
[ levtype=sol, time_0, levelist=1,2,3,4,5, param=228141,
[ levtype=sfc, time_0, param_8,
[ levtype=pl, time_0, levelist_0, param_9,
[ levtype=o3d, time=0000, levelist_6, param_6,
[ levtype=o2d, time=0000, param_10,
[ levtype=hl, time_0, levelist=100, param=228246,228247,
[ resolution=high, type=fc,
[ levtype=o3d, time=0000, levelist_6, param_6,
[ levtype=o2d, time=0000, param_10,
[ date=20200102, resolution=standard, type=fc, levtype=hl, time_0, levelist=100, param=228246,228247,
[ expver=t001, stream=clte,
[ date=19900101,
[ resolution=standard, type=fc,
[ levtype=o2d, time=0000, param=263000,263001,
[ levtype=hl, time_0, levelist=100, param=228246,228247,
[ levtype=sol, time_0, levelist=1,2,3,4,5, param=228141,
[ levtype=sfc, time_0, param_8,
[ levtype=pl, time_0, levelist_0, param_9,
[ levtype=o3d, time=0000, levelist_6, param_6,
[ resolution=high, type=fc,
[ levtype=o3d, time=0000, levelist_6, param_6,
[ levtype=o2d, time=0000, param_10,
[ date=19900102,
[ resolution=standard, type=fc,
[ levtype=hl, time_0, levelist=100, param=228246,228247,
[ levtype=sol, time_0, levelist=1,2,3,4,5, param=228141,
[ levtype=sfc, time_0, param_8,
[ levtype=pl, time_0, levelist_0, param_9,
[ levtype=o3d, time=0000, levelist_6, param_6,
[ resolution=high, type=fc, levtype=o3d, time=0000, levelist_6, param_6,
[ date=19900103,19900104, resolution=standard, type=fc, levtype=hl, time_0, levelist=100, param=228246,228247,
[ expver=t002, stream=clte, date=19900101, resolution=standard,high, type=fc, levtype=o2d, time=0000, param_7,
[ expver=t003, stream=clte, date=19900101, resolution=standard, type=fc, levtype=o2d, time=0000, param_7,
[ activity=scenariomip, experiment=ssp3-7.0, generation=1,
[ model=icon, realization=1, expver=t004, stream=clte, date=20200101, resolution=high, type=fc, levtype=sfc, time_2, param=228004,167,
[ model=ifs-nemo, realization=1, expver=0001, stream=clte,
[ date=20200101,
[ resolution=standard, type=fc,
[ levtype=sfc,
[ time=0000, param_11,
[ time_3, param_8,
[ levtype=sol, time_0, levelist=1,2,3,4,5, param=228141,
[ levtype=pl, time_0, levelist_0, param_9,
[ levtype=o3d, time=0000, levelist_6, param_6,
[ levtype=o2d, time=0000, param_10,
[ levtype=hl, time_0, levelist=100, param=228246,228247,
[ resolution=high, type=fc,
[ levtype=sfc,
[ time=0000,
[ param_11,
[ levelist=150,5, param=60,
[ levelist=500,70, param=135,
[ time_4,
[ param_8,
[ levelist=150,5, param=60,
[ levelist=500,70, param=135,
[ time_5,
[ param_8,
[ levelist=10, param=157,
[ levelist=150,5, param=60,
[ levelist=500,70, param=135,
[ levtype=sol,
[ time_6, levelist=1,2,3,4,5, param=228141,
[ time=2200,2300,
[ levelist=1,2,3,4,5, param=228141,
[ param=181,
[ levtype=pl, time_0, levelist_0, param_9,
[ levtype=o3d, time=0000, levelist_6, param_6,
[ levtype=o2d, time=0000, param_10,
[ levtype=hl, time_0, levelist=100, param=228246,228247,
[ date_17, resolution=standard,high, type=fc,
[ levtype=sfc,
[ time=0000, param_11,
[ time_3, param_8,
[ levtype=sol, time_0, levelist=1,2,3,4,5, param=228141,
[ levtype=pl, time_0, levelist_0, param_9,
[ levtype=o3d, time=0000, levelist_6, param_6,
[ levtype=o2d, time=0000, param_10,
[ levtype=hl, time_0, levelist=100, param=228246,228247,
[ date=20210620,
[ resolution=standard, type=fc,
[ levtype=pl, time_0, levelist_0, param_9,
[ levtype=sfc,
[ time=0000, param_11,
[ time=0100,0200,0300,0400,0500, param_8,
[ time=0600, param_12,
[ resolution=high, type=fc, levtype=sfc, time=0000, param=129,172,

View File

@ -1,12 +1,25 @@
from compress_tree import print_schema_tree, compress_tree
from compress_tree import pretty_schema_tree, compress_tree
import json
from pathlib import Path
print("Loading tree json...")
with open("cache.json", "r") as f:
cache = Path("cache.json")
print(f"cache.json size is {cache.stat().st_size/1e6:.0f} MB")
with open(cache, "r") as f:
tree = json.load(f)
print("Compresssing...")
compressed_tree = compress_tree(tree, max_level = None)
print("Outputting")
print_schema_tree(compressed_tree)
print("Saving compressed_tree.json")
compressed_cache = Path("compressed_cache.json")
with open(compressed_cache, "w") as f:
json.dump(compressed_tree, f)
print(f"compressed_cache.json size is {compressed_cache.stat().st_size/1e3:.0f} KB")
print("Pretty printing")
pretty = pretty_schema_tree(compressed_tree)
# print(pretty)
with open("pretty_compressed_cache.txt", "w") as f:
f.write(pretty)

View File

@ -11,46 +11,26 @@ schema = FDBSchemaFile("/home/eouser/catalogs/backend/destinE_schema")
import pyfdb
from collections import Counter
import os, sys
from pathlib import Path
from datetime import datetime
from compress_tree import print_schema_tree, compress_tree
request = {
"class": "d1",
# "dataset": "climate-dt",
"dataset" : "extremes-dt",
"date" : "-14/-1",
# "time": "0000"
# "activity": 'cmip6',
# "expver": "0001",
"stream": "oper",
# "date": "-1",
# "time": "0000",
# "type": "fc",
# "levtype": "sfc",
"step": "0",
# "param": ""
}
request = {
"class": "d1",
# "dataset": "climate-dt",
# "date" : "19920422",
# "time": "0000"
# "activity": 'cmip6',
# "expver": "0001",
# "stream": "oper",
# "date": "-1",
# "time": "0000",
# "type": "fc",
# "levtype": "sfc",
# "step": "0",
# "param": "129"
}
t0 = time.time()
print("Loading cache.json")
with open("cache.json", "r") as f:
tree = json.load(f)
print(f"That tooks {(time.time() - t0)/60:.0f} mins")
fdb = pyfdb.FDB()
# spans = defaultdict(Counter)
spans = defaultdict(set)
def print_tree(t : dict, last=True, header='', name='', depth = 0, max_depth = 9):
@ -79,7 +59,6 @@ def print_tree(t : dict, last=True, header='', name='', depth = 0, max_depth = 9
for i, name in enumerate(subtrees):
print_tree(t[name], header=header + (blank if last else pipe), last= i == len(subtrees) - 1, name= name, depth = depth + 1, max_depth = max_depth)
tree = {}
total = 0
for item in fdb.list(request, keys = True):
@ -88,30 +67,16 @@ for item in fdb.list(request, keys = True):
loc = tree
for kv in m:
k = f'{kv.key}={kv.str_value()}'
loc["_count"] = loc.get("_count", 0) + 1
if k not in loc: loc[k] = {}
loc = loc[k]
# print(request)
# print(m)
# sys.exit()
total += 1
# for k, v in request.items():
# # spans[k][v] += 1
# spans[k].add(v)
if total % 1000 == 0:
compressed_tree = compress_tree(tree, max_level = None)
with open("cache.json", "w") as f:
json.dump(tree, f)
if total % 100 == 0:
os.system("clear")
print(f"Total: {total}")
print(f"Runtime: {(time.time() - t0):.0f} s")
# print_tree(tree, max_depth = 7)
print_schema_tree(compressed_tree)
os.system("clear")
@ -119,5 +84,16 @@ print(f"Total: {total}")
print(f"Runtime: {(time.time() - t0) / 60:.0f} mins")
print_tree(tree, max_depth = 4)
with open("cache.json", "w") as f:
print("Dumping tree to new_cache.json")
with open("new_cache.json", "w") as f:
json.dump(tree, f)
print(f"Moving cache to backups/cache.json.backup.{datetime.now().strftime('%d.%m.%Y')}")
Path("cache.json").rename(f"backups/cache.json.backup.{datetime.now().strftime('%d.%m.%Y')}")
print(f"Renaming new_cache.json to cache.json")
Path("new_cache.json").rename("cache.json")
print(f"Done in {(time.time() - t0)/60:.0f} min")

42
cache/view_cache.py vendored Normal file
View File

@ -0,0 +1,42 @@
import json
from compress_tree import print_schema_tree, compress_tree
with open("./cache.json", "r") as f:
list_cache = json.load(f)
request = {
"class" : "d1",
"dataset" : "climate-dt",
"activity": "cmip6",
"experiment" : "hist",
"generation" : "1",
"model" : "icon",
"realization" : "1",
"expver" : "0001",
"stream" : "clte",
"date" : "19910410",
}
loc = list_cache
while True:
done = True
for k, v in request.items():
if f"{k}={v}" in loc:
print(f"{k}={v}")
loc = loc[f"{k}={v}"]
done = False
break
if done:
break
for k in loc.keys():
k, v = k.split("=")
print(f'"{k}" : "{v}",')
# compressed_tree = compress_tree(loc, max_level = 3)
# print_schema_tree(compressed_tree)