qubed/cache/raw_to_tree.py
2024-10-28 13:38:50 +00:00

41 lines
1.2 KiB
Python

import time
from collections import defaultdict
import os
from fdb_schema import FDBSchemaFile
import json
from compress_tree import print_schema_tree, compress_tree
schema = FDBSchemaFile("/home/eouser/catalogs/backend/destinE_schema")
tree = {}
i = 0
t0 = time.time()
with open("raw_list", "r") as f:
for line in f.readlines():
i += 1
# if i > 100: break
if not line.startswith("{"): continue
line = line.strip().replace("{", "").replace("}", ",")
# d = dict((k.split("=") for k in line.split(",") if k))
# _, m = schema.match(d)
loc = tree
for k in line.split(","):
if k:
if k not in loc: loc[k] = {}
loc = loc[k]
if i % 10_000 == 0:
# compressed_tree = compress_tree(tree, max_level = None)
# with open("cache.json", "w") as f:
# json.dump(tree, f)
os.system("clear")
print(f"Total: {i}")
print(f"Runtime: {(time.time() - t0):.0f} s")
# print_tree(tree, max_depth = 7)
# print_schema_tree(compressed_tree)
with open("cache.json", "w") as f:
json.dump(tree, f)
print(tree)