Create example ingestion script
This commit is contained in:
parent
04b4ee24eb
commit
ba2c67d812
94
test_scripts/update_dts.py
Normal file
94
test_scripts/update_dts.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
# Example script for ingesting data from an fdb into a qube
|
||||||
|
# Notes
|
||||||
|
# Uses fdb --compact
|
||||||
|
# Splits by data in order to avoid out of memory problems with fdb --compact
|
||||||
|
# Does a bit of processing like removing "year" and "month" keys
|
||||||
|
# Might want to add datatypes and reordering of keys there too
|
||||||
|
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
import psutil
|
||||||
|
from qubed import Qube
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
process = psutil.Process()
|
||||||
|
|
||||||
|
CHUNK_SIZE = timedelta(days=60)
|
||||||
|
FILEPATH = "./full_dt_qube.json"
|
||||||
|
API = "https://qubed.lumi.apps.dte.destination-earth.eu/api/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def ecmwf_date(d):
|
||||||
|
return d.strftime("%Y%m%d")
|
||||||
|
|
||||||
|
|
||||||
|
# start_date = datetime.now() - timedelta(days=10)
|
||||||
|
start_date = datetime(1990, 1, 1)
|
||||||
|
# end_date = datetime.now()
|
||||||
|
end_date = datetime(2026, 1, 1)
|
||||||
|
|
||||||
|
current_span = [end_date - CHUNK_SIZE, end_date]
|
||||||
|
|
||||||
|
qube = Qube.load(FILEPATH)
|
||||||
|
# qube = Qube.empty()
|
||||||
|
|
||||||
|
while current_span[0] > start_date:
|
||||||
|
for config in ["config-climate-dt.yaml", "config-extremes-dt.yaml"]:
|
||||||
|
t0 = time()
|
||||||
|
start, end = map(ecmwf_date, current_span)
|
||||||
|
print(f"Doing {config} {current_span[0].date()} - {current_span[1].date()}")
|
||||||
|
print(f"Current memory usage: {process.memory_info().rss / 1e9:.2g}GB")
|
||||||
|
print(f"{qube.n_nodes = }, {qube.n_leaves = },")
|
||||||
|
|
||||||
|
subqube = Qube.empty()
|
||||||
|
command = [
|
||||||
|
f"fdb list --compact --config {config} --minimum-keys=date class=d1,date={start}/{end}"
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
p = subprocess.run(
|
||||||
|
command,
|
||||||
|
text=True,
|
||||||
|
shell=True,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed for {current_span} {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
print("Got compact list")
|
||||||
|
for i, line in tqdm(enumerate(list(p.stdout.split("\n")))):
|
||||||
|
if not line.startswith("retrieve,class="):
|
||||||
|
continue
|
||||||
|
|
||||||
|
def split(t):
|
||||||
|
return t[0], t[1].split("/")
|
||||||
|
|
||||||
|
# Could do datatypes here
|
||||||
|
request = dict(split(v.split("=")) for v in line.strip().split(",")[1:])
|
||||||
|
request.pop("year", None)
|
||||||
|
request.pop("month", None)
|
||||||
|
# Could do things like date = year + month + day
|
||||||
|
q = Qube.from_datacube(request)
|
||||||
|
subqube = subqube | q
|
||||||
|
print("added to qube")
|
||||||
|
|
||||||
|
qube = qube | subqube
|
||||||
|
subqube.print(depth=2)
|
||||||
|
print(f"{subqube.n_nodes = }, {subqube.n_leaves = },")
|
||||||
|
|
||||||
|
# requests.post(
|
||||||
|
# API + "/union/climate-dt/",
|
||||||
|
# headers = {"Authorization" : "Bearer ?????"},
|
||||||
|
# json = subqube.to_json())
|
||||||
|
|
||||||
|
current_span = [current_span[0] - CHUNK_SIZE, current_span[0]]
|
||||||
|
print(
|
||||||
|
f"Did that taking {(time() - t0) / CHUNK_SIZE.days:2g} seconds per day ingested, total {(time() - t0):2g}s"
|
||||||
|
)
|
||||||
|
with open(FILEPATH, "w") as f:
|
||||||
|
json.dump(qube.to_json(), f)
|
Loading…
x
Reference in New Issue
Block a user