70 lines
1.8 KiB
Python
70 lines
1.8 KiB
Python
from tree_traverser import backend, CompressedTree
|
|
import datetime
|
|
import psutil
|
|
from tqdm import tqdm
|
|
from pathlib import Path
|
|
import json
|
|
from more_itertools import chunked
|
|
|
|
process = psutil.Process()
|
|
|
|
|
|
def massage_request(r):
|
|
return {k: v if isinstance(v, list) else [v] for k, v in r.items()}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
config = """
|
|
---
|
|
type: remote
|
|
host: databridge-prod-catalogue1-ope.ewctest.link
|
|
port: 10000
|
|
engine: remote
|
|
store: remote
|
|
"""
|
|
|
|
request = {
|
|
"class": "d1",
|
|
"dataset": "climate-dt",
|
|
# "date": "19920420",
|
|
}
|
|
|
|
data_path = Path("data/compressed_tree_climate_dt.json")
|
|
if not data_path.exists():
|
|
compressed_tree = CompressedTree({})
|
|
else:
|
|
compressed_tree = CompressedTree.load(data_path)
|
|
|
|
fdb = backend.PyFDB(fdb_config=config)
|
|
|
|
visited_path = Path("data/visited_dates.json")
|
|
if not visited_path.exists():
|
|
visited_dates = set()
|
|
else:
|
|
with open(visited_path, "r") as f:
|
|
visited_dates = set(json.load(f))
|
|
|
|
today = datetime.datetime.today()
|
|
start = datetime.datetime.strptime("19920420", "%Y%m%d")
|
|
date_list = [
|
|
start + datetime.timedelta(days=x) for x in range((today - start).days)
|
|
]
|
|
date_list = [d.strftime("%Y%m%d") for d in date_list if d not in visited_dates]
|
|
for dates in chunked(tqdm(date_list), 5):
|
|
print(dates[0])
|
|
print(f"Memory usage: {(process.memory_info().rss) / 1e6:.1f} MB")
|
|
|
|
r = request | dict(date=dates)
|
|
tree = fdb.traverse_fdb(massage_request(r))
|
|
|
|
compressed_tree.insert_tree(tree)
|
|
compressed_tree.save(data_path)
|
|
|
|
for date in dates:
|
|
visited_dates.add(date)
|
|
|
|
with open(visited_path, "w") as f:
|
|
json.dump(list(visited_dates), f)
|
|
|
|
# print(compressed_tree.reconstruct_compressed_ecmwf_style())
|