Tests passing checkpoint

A bit more on the rust backend
Fix update script a bit
2025-06-03 14:57:27 +02:00 · 2025-05-29 17:09:17 +02:00 · 2025-05-23 16:45:37 +00:00 · 2025-05-23 10:55:32 +01:00 · 2025-05-22 17:26:58 +01:00 · 2025-05-22 14:42:49 +01:00
120 changed files with 24248 additions and 6081 deletions
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@ -35,6 +35,9 @@ jobs:
      - uses: actions/setup-python@v5
        with:
          python-version: 3.x
+      - name: Set cargo version from tag
+        run: python .github/workflows/update_version.py
+
      - name: Build wheels
        uses: PyO3/maturin-action@v1
        with:
@ -66,6 +69,9 @@ jobs:
      - uses: actions/setup-python@v5
        with:
          python-version: 3.x
+      - name: Set cargo version from tag
+        run: python .github/workflows/update_version.py
+
      - name: Build wheels
        uses: PyO3/maturin-action@v1
        with:
@ -94,6 +100,9 @@ jobs:
        with:
          python-version: 3.x
          architecture: ${{ matrix.platform.target }}
+      - name: Set cargo version from tag
+        run: python .github/workflows/update_version.py
+
      - name: Build wheels
        uses: PyO3/maturin-action@v1
        with:
@ -120,6 +129,8 @@ jobs:
      - uses: actions/setup-python@v5
        with:
          python-version: 3.x
+      - name: Set cargo version from tag
+        run: python .github/workflows/update_version.py
      - name: Build wheels
        uses: PyO3/maturin-action@v1
        with:
@ -136,6 +147,8 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
+      - name: Set cargo version from tag
+        run: python .github/workflows/update_version.py
      - name: Build sdist
        uses: PyO3/maturin-action@v1
        with:
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -28,7 +28,7 @@ jobs:
          python-version: 3.x
      - name: Build and Install
        run: |
-          python -m pip install ".[rust,dev]"
+          python -m pip install ".[dev]"
      - name: Test
        run: |
          pytest
--- a/.github/workflows/update_version.py
+++ b/.github/workflows/update_version.py
@ -0,0 +1,42 @@
+import re
+import subprocess
+from pathlib import Path
+
+CARGO_TOML_PATH = Path("Cargo.toml")
+
+
+# Get the latest Git tag and strip the leading 'v' if present
+def get_git_version():
+    try:
+        version = subprocess.check_output(
+            ["git", "describe", "--tags", "--always"], text=True
+        ).strip()
+        version = re.sub(r"^v", "", version)  # Remove leading 'v'
+        return version
+    except subprocess.CalledProcessError:
+        raise RuntimeError(
+            "Failed to get Git tag. Make sure you have at least one tag in the repository."
+        )
+
+
+# Update version in Cargo.toml
+def update_cargo_version(new_version):
+    cargo_toml = CARGO_TOML_PATH.read_text()
+
+    # Replace version in [package] section
+    updated_toml = re.sub(
+        r'^version = "[^"]+"',
+        f'version = "{new_version}"',
+        cargo_toml,
+        flags=re.MULTILINE,
+    )
+
+    CARGO_TOML_PATH.write_text(updated_toml)
+
+
+if __name__ == "__main__":
+    version = get_git_version()
+    print(f"Parsed version: {version}")
+
+    update_cargo_version(version)
+    print(f"Updated Cargo.toml with version: {version}")
--- a/.gitignore
+++ b/.gitignore
@ -13,3 +13,11 @@ target/
 _build/
 build/
 .ipynb_checkpoints/
+dist/
+Cargo.lock
+src/python/qubed/_version.py
+*.ipynb
+cmake_build/
+tests/data/
+*.secret
+node_modules/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,17 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    # -   id: check-yaml
+# -   id: check-added-large-files
+
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.9.7
+  hooks:
+    - id: ruff
+      args: [ --fix ]
+    - id: ruff-format
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@ -22,6 +22,7 @@ sphinx:

 python:
  install:
+    - requirements: docs/requirements.txt
    - method: pip
      path: .
      extra_requirements:
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1,223 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 4
-
-[[package]]
-name = "autocfg"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "heck"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
-
-[[package]]
-name = "indoc"
-version = "2.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
-
-[[package]]
-name = "itoa"
-version = "1.0.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
-
-[[package]]
-name = "libc"
-version = "0.2.169"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
-
-[[package]]
-name = "memchr"
-version = "2.7.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
-
-[[package]]
-name = "memoffset"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
-dependencies = [
- "autocfg",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.20.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
-
-[[package]]
-name = "portable-atomic"
-version = "1.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.93"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "pyo3"
-version = "0.23.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc"
-dependencies = [
- "cfg-if",
- "indoc",
- "libc",
- "memoffset",
- "once_cell",
- "portable-atomic",
- "pyo3-build-config",
- "pyo3-ffi",
- "pyo3-macros",
- "unindent",
-]
-
-[[package]]
-name = "pyo3-build-config"
-version = "0.23.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7"
-dependencies = [
- "once_cell",
- "target-lexicon",
-]
-
-[[package]]
-name = "pyo3-ffi"
-version = "0.23.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d"
-dependencies = [
- "libc",
- "pyo3-build-config",
-]
-
-[[package]]
-name = "pyo3-macros"
-version = "0.23.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7"
-dependencies = [
- "proc-macro2",
- "pyo3-macros-backend",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "pyo3-macros-backend"
-version = "0.23.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4"
-dependencies = [
- "heck",
- "proc-macro2",
- "pyo3-build-config",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "qubed"
-version = "0.1.2"
-dependencies = [
- "pyo3",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.38"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "ryu"
-version = "1.0.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd"
-
-[[package]]
-name = "serde"
-version = "1.0.217"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.217"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "serde_json"
-version = "1.0.138"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
-dependencies = [
- "itoa",
- "memchr",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.98"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "target-lexicon"
-version = "0.12.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
-
-[[package]]
-name = "unindent"
-version = "0.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "qubed"
-version = "0.1.2"
+version = "0.0.0"
 edition = "2021"
 repository = "https://github.com/ecmwf/qubed"

@ -8,8 +8,12 @@ repository = "https://github.com/ecmwf/qubed"
 # rsfdb = {git = "https://github.com/ecmwf/rsfdb", branch = "develop"}
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
-pyo3 = "0.23"
+pyo3 = "0.25"
+lasso = "0.7.3"
+itertools = "0.14.0"

+[package.metadata.maturin]
+version-from-git = true

 [lib]
 name = "tree_traverser"
--- a/README.md
+++ b/README.md
@ -1,6 +1,8 @@
-# Q<sup>3</sup> Quick Querying of Qubes
+# <p align="center"><img src="https://raw.githubusercontent.com/ecmwf/qubed/refs/heads/main/docs/_static/banner.svg" width="1000"></p> 
 [![Static Badge](https://github.com/ecmwf/codex/raw/refs/heads/main/Project%20Maturity/emerging_badge.svg)](https://github.com/ecmwf/codex/raw/refs/heads/main/Project%20Maturity#emerging)
 [![Docs](https://readthedocs.org/projects/qubed/badge/?version=latest)](https://qubed.readthedocs.io/en/latest/)
+[![PyPi](https://img.shields.io/pypi/v/qubed.svg)](https://pypi.org/project/qubed/)
+[![Wheel](https://img.shields.io/pypi/wheel/qubed.svg)](https://pypi.org/project/qubed/)

 Qubed provides a datastructure primitive for working with trees of DataCubes. If a normal tree looks like this:
 ```
@ -44,7 +46,7 @@ In addition to this core datastructure, this repostitory contains a collection o
 - 🌟 Implements our proposed [Datacube STAC Extension](./structured_stac.md).
 - 🛠️ Allows efficient traversal of ECMWF's datacubes.
 - Part of the implementation of this is [🌲 Tree Compressor](./tree_compresser), a **compressed tree representation** optimised for storing trees with many duplicated subtress.
- 🔗 **[Live Example](https://climate-catalogue.lumi.apps.dte.destination-earth.eu/api/stac?root=root&activity=story-nudging%2Cscenariomip&class=d1)**.
+- 🔗 **[Live Example](https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/stac/climate-dt/?class=od%2Cd1&dataset=climate-dt)**.

 ---

@ -52,7 +54,7 @@ In addition to this core datastructure, this repostitory contains a collection o
 > **Web Frontend**

 - 👀 Displays data from the **STAC Server** in an intuitive user interface.
- 🌍 **[Try the Live Demo](https://climate-catalogue.lumi.apps.dte.destination-earth.eu/)**.
+- 🌍 **[Try the Live Demo](https://qubed.lumi.apps.dte.destination-earth.eu/)**.

 ---

--- a/ROADMAP.md
+++ b/ROADMAP.md
@ -0,0 +1,50 @@
+
+Initial Python Implementation
+[x] Basic Qube datastructure
+[x] Compression
+[x] Set Operations (Union, Difference, Intersection...)
+[x] Query with request
+[x] Iteration over leaves
+[x] Iteration over datacubes
+[x] Command line creation from fdb list --compact
+[ ] Set up periodic updates to climate-dt/extremes-dt again
+[ ] Maybe also do production db?
+[ ] Do mars list to contraints conversion
+[ ] protobuf serialization
+
+
+Rust port
+[ ] Initial object
+[ ] Sort out ownership issues, (one arena owned by python object)
+[ ] Compression
+[ ] Set Operations
+[ ] Query with request
+[ ] Iteration over leaves
+[ ] Iteration over datacubes
+[ ] Set up periodic updates to climate-dt/extremes-dt again
+
+## API
+
+Qubed will provide a core compressed tree data structure called a Qube  with:
+
+Methods to convert to and from:
+- [x] A human readable representation like those seen above.
+- [x] An HTML version where subtrees can be collapsed.
+- [ ] An compact protobuf-based binary format
+- [x] Nested python dictionaries or JSON
+- [/] The output of [fdb list](https://confluence.ecmwf.int/display/FDB/fdb-list)
+- [ ] [mars list][mars list]
+- [ ] [constraints.json][constraints]
+
+[constraints]: https://object-store.os-api.cci2.ecmwf.int/cci2-prod-catalogue/resources/reanalysis-era5-land/constraints_a0ae5b42d67869674e13fba9fd055640bcffc37c24578be1f465d7d5ab2c7ee5.json
+[mars list]: https://git.ecmwf.int/projects/CDS/repos/cads-forms-reanalysis/browse/reanalysis-era5-single-levels/gecko-config/mars.list?at=refs%2Fheads%2Fprod
+
+Useful algorithms:
+- [x] Compression
+- [/] Union/Intersection/Difference
+
+Performant Membership Queries
+- Identifier membership
+- Datacube query (selection)
+
+Metadata Storage
--- a/chart/Chart.yaml
+++ b/chart/Chart.yaml
@ -1,5 +1,5 @@
 apiVersion: v2
-name: stac-server
+name: qubed
 description: A Helm chart for the STAC Server with frontend, STAC API and caching service.
 type: application
 version: 0.1.0
--- a/chart/templates/ingress.yaml
+++ b/chart/templates/ingress.yaml
@ -10,7 +10,7 @@ spec:
      http:
        paths:
        {{- if .Values.stacServer.enabled }}
-          - path: /api
+          - path: /
            pathType: Prefix
            backend:
              service:
@ -18,21 +18,8 @@ spec:
                port:
                  number: {{ .Values.stacServer.servicePort }}
        {{- end }}
-        {{- if .Values.webQueryBuilder.enabled }}
-          - path: /
-            pathType: Prefix
-            backend:
-              service:
-                name: web-query-builder
-                port:
-                  number: {{ .Values.webQueryBuilder.servicePort }}
-        {{- end }}
  tls:
    - hosts:
        - {{ .Values.ingress.hostname }}
      secretName: {{ .Values.ingress.tlsSecretName }}
 {{- end }}
-
-
-
-
--- a/chart/templates/redis-deployment.yaml
+++ b/chart/templates/redis-deployment.yaml
@ -1,28 +0,0 @@
-# templates/redis-deployment.yaml
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: redis
-spec:
-  selector:
-    matchLabels:
-      app: redis
-  template:
-    metadata:
-      labels:
-        app: redis
-    spec:
-      containers:
-        - name: redis
-          image: "redis:alpine"
-          command: ["redis-server", "--appendonly", "yes"]
-          ports:
-            - containerPort: {{ .Values.redis.servicePort }}
-          # volumeMounts:
-          #   - mountPath: /data
-          #     name: redis-data
-      # volumes:
-      #   - name: redis-data
-      #     persistentVolumeClaim:
-      #       claimName: redis-data
--- a/chart/templates/redis-pvc.yaml
+++ b/chart/templates/redis-pvc.yaml
@ -1,14 +0,0 @@
-# templates/redis-pvc.yaml
-
-{{- if .Values.redis.pvc.enabled }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: redis-data
-spec:
-  accessModes: {{ .Values.redis.pvc.accessModes }}
-  resources:
-    requests:
-      storage: {{ .Values.redis.pvc.size }}
-  storageClassName: {{ .Values.redis.pvc.storageClassName | quote }}
-{{- end }}
--- a/chart/templates/redis-service.yaml
+++ b/chart/templates/redis-service.yaml
@ -1,11 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: redis
-spec:
-  selector:
-    app: redis
-  ports:
-    - protocol: TCP
-      port: {{ .Values.redis.servicePort }}
-      targetPort: {{ .Values.redis.servicePort }}
--- a/chart/templates/stac-server-configmap.yaml
+++ b/chart/templates/stac-server-configmap.yaml
@ -1,11 +0,0 @@
-# apiVersion: v1
-# kind: ConfigMap
-# metadata:
-#   name: stack-server
-# data:
-#   file1.txt: |-
-#     {{ .Files.Get "files/file1.txt" | nindent 2 }}
-#   file2.txt: |-
-#     {{ .Files.Get "files/file2.txt" | nindent 2 }}
-#   file3.txt: |-
-#     {{ .Files.Get "files/file3.txt" | nindent 2 }}
--- a/chart/templates/stac-server-deployment.yaml
+++ b/chart/templates/stac-server-deployment.yaml
@ -5,7 +5,7 @@ kind: Deployment
 metadata:
  name: stac-server
 spec:
-  replicas: 1  # Adjust as needed
+  replicas: {{ .Values.stacServer.replicas }}
  selector:
    matchLabels:
      app: stac-server
@ -14,20 +14,30 @@ spec:
      labels:
        app: stac-server
    spec:
-      initContainers:
-        - name: wait-for-redis
-          image: busybox
-          command:
-            [
-              'sh', '-c',
-              'until nc -z -v -w30 {{ .Values.stacServer.environment.REDIS_HOST }} {{ .Values.redis.service.port }}; do echo "Waiting for Redis..."; sleep 5; done;'
-            ]
      containers:
        - name: stac-server
          image: "{{ .Values.stacServer.image.repository }}:{{ .Values.stacServer.image.tag }}"
          imagePullPolicy: {{ .Values.stacServer.image.pullPolicy }}
          env:
-            - name: REDIS_HOST
-              value: "{{ .Values.stacServer.environment.REDIS_HOST }}"
+            - name: API_KEY
+              valueFrom:
+                secretKeyRef:
+                    name: api-key
+                    key: API_KEY
+            - name: API_URL
+              value: "https://{{ .Values.ingress.hostname }}/api/v1/"
          ports:
            - containerPort: {{ .Values.stacServer.servicePort }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: stac-server
+spec:
+  selector:
+    app: stac-server
+  ports:
+    - protocol: TCP
+      port: {{ .Values.stacServer.servicePort }}
+      targetPort: {{ .Values.stacServer.servicePort }}
+  type: ClusterIP
--- a/chart/templates/stac-server-service.yaml
+++ b/chart/templates/stac-server-service.yaml
@ -1,12 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: stac-server
-spec:
-  selector:
-    app: stac-server
-  ports:
-    - protocol: TCP
-      port: {{ .Values.stacServer.servicePort }}
-      targetPort: {{ .Values.stacServer.servicePort }}
-  type: ClusterIP
--- a/chart/templates/web-query-builder-deployment.yaml
+++ b/chart/templates/web-query-builder-deployment.yaml
@ -1,37 +0,0 @@
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: web-query-builder
-spec:
-  replicas: {{ .Values.webQueryBuilder.replicas }}
-  selector:
-    matchLabels:
-      app: web-query-builder
-  template:
-    metadata:
-      labels:
-        app: web-query-builder
-    spec:
-      containers:
-        - name: web-query-builder
-          image: "{{ .Values.webQueryBuilder.image.repository }}:{{ .Values.webQueryBuilder.image.tag }}"
-          imagePullPolicy: {{ .Values.webQueryBuilder.image.pullPolicy }}
-          env:
-            - name: API_HOST
-              value: stac-server
-          ports:
-            - containerPort: {{ .Values.webQueryBuilder.servicePort }}
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: web-query-builder
-spec:
-  selector:
-    app: web-query-builder
-  ports:
-    - protocol: TCP
-      port: {{ .Values.webQueryBuilder.servicePort }}
-      targetPort: {{ .Values.webQueryBuilder.servicePort }}
-  type: ClusterIP
--- a/chart/values.yaml
+++ b/chart/values.yaml
@ -1,33 +1,13 @@
-redis:
-  servicePort: 6379
-  pvc:
-    enabled: true
-    storageClassName: ""
-    accessModes:
-      - ReadWriteOnce
-    size: 1Gi
-  service:
-    port: 6379
-
 stacServer:
  enabled: true
+  replicas: 1
  image:
    repository: "eccr.ecmwf.int/qubed/stac_server"
    tag: "latest"
    pullPolicy: Always
  servicePort: 80
-  environment:
-    REDIS_HOST: "redis"
-
-webQueryBuilder:
-  enabled: true
-  image:
-    repository: "eccr.ecmwf.int/qubed/web_query_builder"
-    tag: "latest"
-    pullPolicy: Always
-  servicePort: 80

 ingress:
  enabled: True
  tlsSecretName: "lumi-wildcard-tls"
-  hostname: "climate-catalogue.lumi.apps.dte.destination-earth.eu"
+  hostname: "qubed.lumi.apps.dte.destination-earth.eu"
--- a/compose.yaml
+++ b/compose.yaml
@ -1,16 +1,5 @@

 services:
-  # redis server holds the catalog data blob
-  redis:
-    image: redis:alpine
-    container_name: redis
-    command: ["redis-server", "--appendonly", "yes"]
-    volumes:
-      - ./redis-data:/data
-    ports:
-      - "6379:6379"
-    restart: always
-
  # STAC Server
  stac_server:
    # image: stac-server:latest
@ -20,27 +9,24 @@ services:
      dockerfile: Dockerfile
      target: stac_server
    ports:
-      - "8124:8080"
+      - "8124:80"
    environment:
-      - REDIS_HOST=redis
      - CONFIG_DIR=/config
    volumes:
      - ./stac_server:/code/stac_server
-      - ./TreeTraverser:/code/TreeTraverser
-    # restart: always
-
-  # web_query_builder:
-  #   # image: web_query_builder:latest
-  #   container_name: web_query_builder
-  #   build:
-  #     context: .
-  #     dockerfile: Dockerfile
-  #     target: web_query_builder
-  #   ports:
-  #     - "8125:8080"
-  #   environment:
-  #     - CONFIG_DIR=/config
-  #   volumes:
-  #     - ./web_query_builder:/code/web_query_builder
    # restart: always

+  web_query_builder:
+    # image: web_query_builder:latest
+    container_name: web_query_builder
+    build:
+      context: .
+      dockerfile: Dockerfile
+      target: web_query_builder
+    ports:
+      - "8125:80"
+    environment:
+      - API_URL=http://127.0.0.1:8124/api/v1/stac/climate-dt
+    volumes:
+      - ./web_query_builder:/code/web_query_builder
+    restart: always
--- a/config/config-climate-dt.yaml
+++ b/config/config-climate-dt.yaml
@ -0,0 +1,6 @@
+---
+type: remote
+host: databridge-prod-catalogue3-ope.ewctest.link
+port: 10000
+engine: remote
+store: remote
--- a/config/config-extremes-dt.yaml
+++ b/config/config-extremes-dt.yaml
@ -0,0 +1,6 @@
+---
+type: remote
+host: databridge-prod-catalogue1-ope.ewctest.link
+port: 10000
+engine: remote
+store: remote
--- a/config/extremes-dt/language.yaml
+++ b/config/extremes-dt/language.yaml
--- a/config/climate-dt/language.yaml
+++ b/config/climate-dt/language.yaml
--- a/config/language/paramids.yaml
+++ b/config/language/paramids.yaml
--- a/config/local/language.yaml
+++ b/config/local/language.yaml
--- a/29
+++ b/29
@ -24,32 +24,13 @@ FROM base AS stac_server
 COPY stac_server/requirements.txt /code/requirements.txt
 RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt

-# Todo: don't embed this here, mount them at runtime
-# ENV CONFIG_DIR=/config/
-# COPY config/destinE/config.yaml /config/config.yaml
-# COPY config/destinE/schema /config/schema
-# COPY config/destinE/language.yaml /config/language.yaml
+COPY ./src /code/qubed/src
+COPY ./pyproject.toml /code/qubed/
+COPY ./Cargo.toml /code/qubed/
+COPY ./README.md /code/qubed/

-COPY ./tree_compresser /code/tree_compresser 
-
-# Clone the rsfdb and rsfindlibs repos manually because they're private
-
-# RUN --mount=type=ssh git clone ssh://git@github.com/ecmwf/rsfdb.git
-# RUN --mount=type=ssh git clone ssh://git@github.com/ecmwf/rsfindlibs.git
-COPY stac_server/deps/rsfdb /code/rsfdb
-COPY stac_server/deps/rsfindlibs /code/rsfindlibs
-
-RUN pip install --no-cache-dir -e /code/tree_compresser 
+RUN pip install --no-cache-dir -e /code/qubed
 COPY ./stac_server /code/stac_server

 WORKDIR /code/stac_server
 CMD ["fastapi", "dev", "main.py", "--proxy-headers", "--port", "80", "--host", "0.0.0.0"]
-
-FROM base AS web_query_builder
-
-COPY web_query_builder/requirements.txt /code/requirements.txt
-RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
-
-COPY web_query_builder /code/web_query_builder
-WORKDIR /code/web_query_builder
-CMD ["flask", "run", "--host", "0.0.0.0", "--port", "80"]
--- a/docs/_static/banner.svg
+++ b/docs/_static/banner.svg
@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="1000"
+   height="200"
+   viewBox="0 0 264.58333 52.916666"
+   version="1.1"
+   id="svg5"
+   xml:space="preserve"
+   inkscape:version="1.2.2 (b0a84865, 2022-12-01)"
+   sodipodi:docname="banner.svg"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg"><sodipodi:namedview
+     id="namedview7"
+     pagecolor="#ffffff"
+     bordercolor="#000000"
+     borderopacity="0.25"
+     inkscape:showpageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:deskcolor="#d1d1d1"
+     inkscape:document-units="mm"
+     showgrid="false"
+     inkscape:zoom="1.4221154"
+     inkscape:cx="509.80392"
+     inkscape:cy="23.908046"
+     inkscape:window-width="2665"
+     inkscape:window-height="1000"
+     inkscape:window-x="96"
+     inkscape:window-y="35"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g330" /><defs
+     id="defs2"><rect
+       x="641.41612"
+       y="32.816639"
+       width="73.588826"
+       height="29.833308"
+       id="rect2775" /><rect
+       x="500.20513"
+       y="263.52755"
+       width="244.63313"
+       height="143.19988"
+       id="rect2749" /><rect
+       x="467.38849"
+       y="331.14972"
+       width="258.55534"
+       height="132.261"
+       id="rect2743" /><rect
+       x="80.859469"
+       y="61.833711"
+       width="299.65568"
+       height="114.15454"
+       id="rect242" /><rect
+       x="61.833711"
+       y="66.590151"
+       width="313.925"
+       height="114.15454"
+       id="rect236" /></defs><g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"><text
+       xml:space="preserve"
+       transform="scale(0.26458333)"
+       id="text234"
+       style="font-weight:500;font-size:20px;line-height:1.2;font-family:Futura;-inkscape-font-specification:'Futura, Medium';white-space:pre;shape-inside:url(#rect236);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><text
+       xml:space="preserve"
+       transform="scale(0.26458333)"
+       id="text240"
+       style="font-weight:500;font-size:20px;line-height:1.2;font-family:Futura;-inkscape-font-specification:'Futura, Medium';white-space:pre;shape-inside:url(#rect242);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><text
+       xml:space="preserve"
+       transform="scale(0.26458333)"
+       id="text2741"
+       style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:20px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect2743);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><text
+       xml:space="preserve"
+       transform="scale(0.26458333)"
+       id="text2747"
+       style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:20px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect2749);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><text
+       xml:space="preserve"
+       transform="scale(0.26458333)"
+       id="text2773"
+       style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:26.6667px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect2775);display:inline;fill:#000000;stroke-width:0.926667;stroke-miterlimit:4.9;stroke-dasharray:2.78, 0.926667;stroke-opacity:0.40146" /><g
+       id="g349"
+       transform="translate(-5.8208336)"><text
+         xml:space="preserve"
+         style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;stroke-width:0.245181;stroke-miterlimit:4.9;stroke-dasharray:0.735542, 0.245181;stroke-opacity:0.40146"
+         x="114.48351"
+         y="14.228302"
+         id="text2763"><tspan
+           sodipodi:role="line"
+           id="tspan2761"
+           style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
+           x="114.48351"
+           y="14.228302">root</tspan><tspan
+           sodipodi:role="line"
+           style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
+           x="114.48351"
+           y="21.701376"
+           id="tspan2765">├── class=cd, stream=lwda/oai, param=1/2/3</tspan><tspan
+           sodipodi:role="line"
+           style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
+           x="114.48351"
+           y="29.17445"
+           id="tspan2767">├── class=od, expver=1/2, param=1/2</tspan><tspan
+           sodipodi:role="line"
+           style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
+           x="114.48351"
+           y="36.647522"
+           id="tspan2771">├── class=rd, param=1/2/3</tspan><tspan
+           sodipodi:role="line"
+           style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
+           x="114.48351"
+           y="44.120598"
+           id="tspan2769">└── ...</tspan></text><g
+         id="g330"><text
+           xml:space="preserve"
+           style="font-weight:500;font-size:14.1111px;line-height:0;font-family:Futura;-inkscape-font-specification:'Futura, Medium';white-space:pre;inline-size:112.115;display:inline;fill:#000000;stroke-width:0.245181;stroke-miterlimit:4.9;stroke-dasharray:0.735542, 0.245181;stroke-opacity:0.40146"
+           x="5.439929"
+           y="17.022402"
+           id="text248"
+           transform="translate(0,-1.5875)"><tspan
+             x="5.439929"
+             y="17.022402"
+             id="tspan532"><tspan
+               style="font-size:12.3472px;line-height:1.2"
+               id="tspan530">Qube</tspan></tspan></text><text
+           xml:space="preserve"
+           style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:5.29167px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;inline-size:87.6248;display:inline;fill:#000000;stroke-width:0.245181;stroke-miterlimit:4.9;stroke-dasharray:0.735542, 0.245181;stroke-opacity:0.40146"
+           x="4.209815"
+           y="40.519432"
+           id="text2755"
+           transform="translate(1.744648,-4.9844494)"><tspan
+             x="4.209815"
+             y="40.519432"
+             id="tspan534">1. A data structure for efficiently </tspan><tspan
+             x="4.209815"
+             y="46.604852"
+             id="tspan536">representing and querying complex </tspan><tspan
+             x="4.209815"
+             y="52.690271"
+             id="tspan538">tree-like datacubes.</tspan></text><text
+           xml:space="preserve"
+           style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:5.29167px;line-height:1.15;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;stroke-width:0.245181;stroke-miterlimit:4.9;stroke-dasharray:0.735542, 0.245181;stroke-opacity:0.40146"
+           x="5.4673572"
+           y="26.586193"
+           id="text2759"><tspan
+             sodipodi:role="line"
+             id="tspan2757"
+             style="font-style:normal;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.245181"
+             x="5.4673572"
+             y="26.586193">[kjuːb] <tspan
+   style="font-style:italic;font-variant:normal;font-weight:500;font-stretch:normal;font-size:6.35px;font-family:Futura;-inkscape-font-specification:'Futura, Medium Italic';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal"
+   id="tspan495">noun</tspan></tspan></text><path
+           style="fill:#000000;stroke:#000000;stroke-width:0.445;stroke-miterlimit:4.9;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+           d="M 6.0516036,18.417924 H 92.221177"
+           id="path2833"
+           sodipodi:nodetypes="cc" /></g></g></g></svg>
--- a/docs/api.md
+++ b/docs/api.md
@ -1,14 +0,0 @@
-# API 
-
-## Set Operations
-
-```{code-cell} python3
-from qubed import Qube
-
-A = Qube.from_dict({
-    "a=1": {"b": {1, 2, 3}, "c": {1}},
-    "a=2": {"b": {1, 2, 3}, "c": {1}},
-})
-A
-```
-
--- a/docs/autobuild.sh
+++ b/docs/autobuild.sh
@ -1 +1,5 @@
+# cd to current directory of script
+parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
+cd "$parent_path"
+
 sphinx-autobuild . _build
--- a/docs/background.md
+++ b/docs/background.md
@ -0,0 +1,87 @@
+---
+jupytext:
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.16.4
+---
+# Datacubes, Trees and Compressed trees
+
+This section contains a bit more of an introduction to the datastructure, feel free to skip to the [Quickstart](quickstart.md). See the [datacube spec](https://github.com/ecmwf/datacube-spec), for even more detail and the canonical source of truth on the matter.
+
+Qubed is primarily geared towards dealing with datafiles uniquely labeled by sets of key value pairs. We'll call a set of key value pairs that uniquely labels some data an `identifier`. Here's an example:
+
+```python
+{
+ 'class': 'd1',
+ 'dataset': 'climate-dt',
+ 'generation': '1',
+ 'date': '20241102',
+ 'resolution': 'high',
+ 'time': '0000',
+}
+```
+
+Unfortunately, we have more than one data file. If we are lucky, the set of identifiers that current exists might form a dense datacube that we could represent like this:
+
+```python
+{
+ 'class': ['d1', 'd2'],
+ 'dataset': 'climate-dt',
+ 'generation': ['1','2','3'],
+ 'model': 'icon',
+ 'date': ['20241102','20241103'],
+ 'resolution': ['high','low'],
+ 'time': ['0000', '0600', '1200', '1800'],
+}
+```
+
+with the property that any particular choice for a value for any key will correspond to datafile that exists. So this object represents `2x1x3x1x2x2x4 = 96` different datafiles.
+
+To save space I will also represent this same thing like this:
+```
+- class=d1/d2, dataset=climate-dt, generation=1/2/3, ..., time=0000/0600/1200/1800
+```
+
+Unfortunately, we are not lucky and our datacubes are not always dense. In this case we might instead represent which data exists using a tree:
+
+```{code-cell} python3
+from qubed import Qube
+
+q = Qube.from_dict({
+    "class=od" : {
+        "expver=0001": {"param=1":{}, "param=2":{}},
+        "expver=0002": {"param=1":{}, "param=2":{}},
+    },
+    "class=rd" : {
+        "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
+        "expver=0002": {"param=1":{}, "param=2":{}},
+    },
+})
+
+# depth controls how much of the tree is open when rendered as html.
+q.html(depth=100)
+```
+
+But it's clear that the above tree contains a lot of redundant information. Many of the subtrees are identical for example. Indeed in practice a lot of our data turns out to be 'nearly dense' in that it contains many dense datacubes within it.
+
+There are many valid ways one could compress this tree. If we add the restriction that no identical key=value pairs can be adjacent then here is the compressed tree we might get:
+
+```{code-cell} python3
+q.compress()
+````
+
+```{warning}
+Without the above restriction we could, for example, have:
+
+    root
+    ├── class=od, expver=0001/0002, param=1/2
+    └── class=rd
+        ├── expver=0001, param=3
+        └── expver=0001/0002, param=1/2
+
+but we do not allow this because it would mean we would have to take multiple branches in order to find data with `expver=0001`.
+```
+
+What we have now is a tree of dense datacubes which represents a single larger sparse datacube in a more compact manner. For want of a better word we'll call it a Qube.
--- a/docs/cmd.md
+++ b/docs/cmd.md
@ -0,0 +1,44 @@
+# Command Line Usage
+
+```bash
+fdb list class=rd,expver=0001,... | qubed --from=fdblist --to=text
+
+fdb list --minimum-keys=class class=d1,dataset=climate-dt --config prod_remoteFDB.yaml  | qubed convert --from=fdb --to=text
+
+```
+
+`--from` options include:
+* `fdb`
+
+`--to` options include:
+* `text`
+* `html`
+* `json`
+
+use `--input` and `--output` to specify input and output files respectively.
+
+
+There's some handy test data in the `tests/data` directory. For example:
+```bash
+gzip -dc tests/data/fdb_list_compact.gz| qubed convert --from=fdb --to=text --output=qube.txt
+gzip -dc tests/data/fdb_list_porcelain.gz| qubed convert --from=fdb --to=json --output=qube.json
+gzip -dc tests/data/fdb_list_compact.gz | qubed convert --from=fdb --to=html --output=qube.html
+
+// Operational data stream=oper/wave/enfo/waef
+fdb list class=od,expver=0001,date=0,stream=oper --compact >> operational_compact.txt
+operational_compact.txt | qubed convert --from=fdb --to=text --output=operational.txt
+```
+
+
+
+## Todo
+
+--from for
+* `protobuf`
+* `marslist`
+* `constraints`
+
+--to for
+* `json`
+* `datacubes`
+* `constraints`
--- a/docs/conf.py
+++ b/docs/conf.py
@ -6,10 +6,10 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

-project = 'qubed'
-copyright = '2025, Tom Hodson (ECMWF)'
-author = 'Tom Hodson (ECMWF)'
-release = '0.1.0'
+project = "qubed"
+copyright = "2025, Tom Hodson (ECMWF)"
+author = "Tom Hodson (ECMWF)"
+release = "0.1.0"

 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@ -20,8 +20,8 @@ extensions = [
    "myst_nb",  # For parsing markdown
 ]

-templates_path = ['_templates']
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', "jupyter_execute"]
+templates_path = ["_templates"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "jupyter_execute"]


 source_suffix = {
@ -33,3 +33,7 @@ source_suffix = {

 html_theme = "sphinx_rtd_theme"
 html_static_path = ["_static"]
+
+myst_enable_extensions = [
+    "attrs_inline",
+]
--- a/docs/development.md
+++ b/docs/development.md
@ -1,6 +1,19 @@
 # Development

+To install the latest stable release from PyPI (recommended):
+
+```bash
+pip install qubed
+```
+
+To install the latest version from github (requires rust):
+
+```bash
+pip install qubed@git+https://github.com/ecmwf/qubed.git@main
+```
+
 To build the develop branch from source install a rust toolchain and pip install maturin then run:
+
 ```
 git clone -b develop git@github.com:ecmwf/qubed.git
 cd qubed
--- a/docs/fiab.md
+++ b/docs/fiab.md
@ -0,0 +1,137 @@
+---
+jupytext:
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.16.4
+---
+
+# Fiab
+
+## Model Selection
+
+This is a demo of using qubed to select from a set of forecast models that each produce a set of output variables.
+
+First let's construct some models represented as qubes:
+
+```{code-cell} python3
+from qubed import Qube
+model_1 = Qube.from_datacube({
+        "levtype": "pl",
+        "param" : ["q", "t", "u", "v", "w", "z"],
+        "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
+    }) | Qube.from_datacube({
+        "levtype": "sfc",
+        "param" : ["10u", "10v", "2d", "2t", "cp", "msl", "skt", "sp", "tcw", "tp"],
+})
+
+model_1 = "model=1" / ("frequency=6h" / model_1)
+model_1
+```
+
+This is the most complete model. Now let's do one with fewer variables and levels:
+
+```{code-cell} python3
+model_2 = Qube.from_datacube({
+        "levtype": "pl",
+        "param" : ["q", "t"],
+        "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
+    }) | Qube.from_datacube({
+        "levtype": "sfc",
+        "param" : ["2t", "cp", "msl"],
+})
+model_2 = "model=2" / ("frequency=continuous" / model_2)
+```
+
+```{code-cell} python3
+model_3 = Qube.from_datacube({
+        "levtype": "pl",
+        "param" : ["q", "t"],
+        "level" : [100, 200, 300, 400, 50, 850, 500, 150, 600, 250, 700, 925, 1000],
+    }) | Qube.from_datacube({
+        "levtype": "sfc",
+        "param" : ["2t", "cp", "msl"],
+})
+model_3 = "model=3" / ("frequency=6h" / model_3)
+model_3
+```
+
+
+Now we can combine the three models into a single qube:
+
+```{code-cell} python3
+all_models = model_1 | model_2 | model_3
+all_models
+```
+
+Now we can perform queries over the models. We can get all models that produce 2m temperature:
+```{code-cell} python3
+all_models.select({
+    "param" : "2t",
+})
+```
+
+Filter on both parameter and frequency:
+
+```{code-cell} python3
+all_models.select({
+    "param" : "2t",
+    "frequency": "continuous",
+})
+```
+
+Find all models that have some overlap with this set of parameters:
+
+```{code-cell} python3
+all_models.select({
+    "param" : ["q", "t", "u", "v"],
+})
+```
+
+## Choosing a set of models based on the requested parameter set
+
+```{code-cell} python3
+all_models.select({
+    "param" : ["q", "t", "u", "v"],
+    "frequency": "6h",
+})
+```
+
+## Using WildCards
+
+```{code-cell} python3
+daily_surface_means = Qube.from_datacube({
+    "model": "*",
+    "frequency": "*",
+    "levtype": "sfc",
+    "param": "*",
+})
+all_models & daily_surface_means
+```
+
+```{code-cell} python3
+
+daily_level_means = Qube.from_datacube({
+    "model": "*",
+    "frequency": "*",
+    "levtype": "pl",
+    "param": "*",
+    "level": "*"
+})
+all_models & daily_level_means
+```
+
+```{code-cell} python3
+daily_level_mean_products = all_models & daily_surface_means
+for i, identifier in enumerate(daily_level_mean_products.leaves()):
+    print(identifier)
+    if i > 10:
+        print("...")
+        break
+
+```
+
+<!-- ## Choosing the fewest models needed to cover the requested parameter set -->
+
+<!-- ```{code-cell} python3 -->
--- a/docs/index.md
+++ b/docs/index.md
@ -12,119 +12,42 @@ jupytext:
 ```{toctree}
 :maxdepth: 1
 quickstart.md
-api.md
 development.md
+background.md
 algorithms.md
+fiab.md
+cmd.md
 ```

-# Datacubes, Trees and Compressed trees
-
-This first part is essentially a abridged version of the [datacube spec](https://github.com/ecmwf/datacube-spec), see that document for more detail and the canonical source of truth on the matter.
-
-Qubed is primarily geared towards dealing with datafiles uniquely labeled by sets of key value pairs. We'll call a set of key value pairs that uniquely labels some data an `identifier`. Here's an example:
-
-```python
-{
- 'class': 'd1',
- 'dataset': 'climate-dt',
- 'generation': '1',
- 'date': '20241102',
- 'resolution': 'high',
- 'time': '0000',
-}
+Qubed provides a datastructure primitive for working with trees of DataCubes. If a normal tree looks like this:
+```
+root
+├── class=od
+│   ├── expver=0001
+│   │   ├── param=1
+│   │   └── param=2
+│   └── expver=0002
+│       ├── param=1
+│       └── param=2
+└── class=rd
+    ├── expver=0001
+    │   ├── param=1
+    │   ├── param=2
+    │   └── param=3
+    └── expver=0002
+        ├── param=1
+        └── param=2
 ```

-Unfortunately, we have more than one data file. If we are lucky, the set of identifiers that current exists might form a dense datacube that we could represent like this:
-
-```python
-{
- 'class': ['d1', 'd2'],
- 'dataset': 'climate-dt',
- 'generation': ['1','2','3'],
- 'model': 'icon',
- 'date': ['20241102','20241103'],
- 'resolution': ['high','low'],
- 'time': ['0000', '0600', '1200', '1800'],
-}
+A compressed view of the same set would be:
 ```
-
-with the property that any particular choice for a value for any key will correspond to datafile that exists. So this object represents `2x1x3x1x2x2x4 = 96` different datafiles. 
-
-To save space I will also represent this same thing like this:
-```
- class=d1/d2, dataset=climate-dt, generation=1/2/3, ..., time=0000/0600/1200/1800
-```
-
-Unfortunately, we are not lucky and our datacubes are not always dense. In this case we might instead represent which data exists using a tree:
-
-```{code-cell} python3
-from qubed import Qube
-
-q = Qube.from_dict({
-    "class=od" : {
-        "expver=0001": {"param=1":{}, "param=2":{}},
-        "expver=0002": {"param=1":{}, "param=2":{}},
-    },
-    "class=rd" : {
-        "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
-        "expver=0002": {"param=1":{}, "param=2":{}},
-    },
-})
-
-# depth controls how much of the tree is open when rendered as html.
-q.html(depth=100)
-```
-
-But it's clear that the above tree contains a lot of redundant information. Many of the subtrees are identical for example. Indeed in practice a lot of our data turns out to be 'nearly dense' in that it contains many dense datacubes within it.
-
-There are many valid ways one could compress this tree. If we add the restriction that no identical key=value pairs can be adjacent then here is the compressed tree we might get:
-
-```{code-cell} python3
-q.compress()
-````
-
-```{warning}
-Without the above restriction we could, for example, have:
-
 root
 ├── class=od, expver=0001/0002, param=1/2
 └── class=rd
-        ├── expver=0001, param=3
-        └── expver=0001/0002, param=1/2
-
-but we do not allow this because it would mean we would have to take multiple branches in order to find data with `expver=0001`.
+    ├── expver=0001, param=1/2/3
+    └── expver=0002, param=1/2
 ```

-What we have now is a tree of dense datacubes which represents a single larger sparse datacube in a more compact manner. For want of a better word we'll call it a Qube.
-
-
-## API
-
-Qubed will provide a core compressed tree data structure called a Qube  with:
-
-Methods to convert to and from:
- [x] A human readable representation like those seen above.
- [x] An HTML version where subtrees can be collapsed.
- [ ] An compact protobuf-based binary format
- [x] Nested python dictionaries or JSON
- [/] The output of [fdb list](https://confluence.ecmwf.int/display/FDB/fdb-list)
- [ ] [mars list][mars list]
- [ ] [constraints.json][constraints]
-
-[constraints]: https://object-store.os-api.cci2.ecmwf.int/cci2-prod-catalogue/resources/reanalysis-era5-land/constraints_a0ae5b42d67869674e13fba9fd055640bcffc37c24578be1f465d7d5ab2c7ee5.json
-[mars list]: https://git.ecmwf.int/projects/CDS/repos/cads-forms-reanalysis/browse/reanalysis-era5-single-levels/gecko-config/mars.list?at=refs%2Fheads%2Fprod
-
-Useful algorithms:
- [x] Compression
- [/] Union/Intersection/Difference
-
-Performant Membership Queries
- Identifier membership
- Datacube query (selection)
-
-Metadata Storage
-
-
-
-
+Qubed provides a datastructure that represents this compressed cube we call a Qube. It defines all the algorithms you would expect such as intersection/union/difference, compression, search, transformation and filtering.

+To get a little more background on the motivation and structure of a Qube go to [Background](background.md), for a more hands on intro, go to [Quickstart](quickstart.md).
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@ -8,18 +8,54 @@ jupytext:
 ---
 # Quickstart

-## Installation
-```bash
-pip install qubed
+First install qubed with `pip install qubed`. Now, let's dive in with a real world dataset from the [Climate DT](https://destine.ecmwf.int/climate-change-adaptation-digital-twin-climate-dt/). We'll pull a prebuilt qube from github and render it in it's default HTML representation.
+
+```{code-cell} python3
+import requests
+from qubed import Qube
+climate_dt = Qube.from_json(requests.get("https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json())
+climate_dt.html(depth=1)
 ```

-## Usage
-Make an uncompressed qube:
+Click the arrows to expand and drill down deeper into the data.
+
+```{note}
+There is currently a simple Qube web browser hosted [here](https://qubed.lumi.apps.dte.destination-earth.eu/). Browse that and copy the 'Example Qube Code' to download a Qube representing the selection at that point. You'll get something like `Qube.from_json(requests.get("https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/select/climate-dt/?").json())`{l=python}
+```
+
+Fundamentally a Qube represents a set identifiers which are a set of key value pairs, here's the one leaf in the Climate DT dataset:
+
+```{code-cell} python3
+next(climate_dt.leaves())
+```
+
+We can look at the set of values each key can take:
+```{code-cell} python3
+axes = climate_dt.axes()
+for key, values in axes.items():
+    print(f"{key} : {list(sorted(values))[:10]}")
+```
+
+This dataset isn't dense, you can't choose any combination of the above key values pairs, but it does contain many dense datacubes. Hence it makes sense to store and process the set as a tree of dense datacubes, which is what a Qube. For a sense of scale, this dataset contains about 8 million distinct datasets but only contains a few hundred unique nodes.
+
+```{code-cell} python3
+import objsize
+print(f"""
+Distinct datasets: {climate_dt.n_leaves}
+Number of nodes in the tree: {climate_dt.n_nodes}
+Number of dense datacubes within this qube: {len(list(climate_dt.datacubes()))}
+In memory size according to objsize: {objsize.get_deep_size(climate_dt) / 2**20:.0f} MB
+""")
+```
+
+## Building your own Qubes
+
+You can do it from nested dictionaries with keys in the form "{key=value}":

 ```{code-cell} python3
 from qubed import Qube

-q = Qube.from_dict({
+q1 = Qube.from_dict({
    "class=od" : {
        "expver=0001": {"param=1":{}, "param=2":{}},
        "expver=0002": {"param=1":{}, "param=2":{}},
@ -29,31 +65,107 @@ q = Qube.from_dict({
        "expver=0002": {"param=1":{}, "param=2":{}},
    },
 })
-print(f"{q.n_leaves = }, {q.n_nodes = }")
-q
+print(f"{q1.n_leaves = }, {q1.n_nodes = }")
+q1
 ```

-Compress it:
+If someone sends you a printed qube you can convert that back to a Qube too:

 ```{code-cell} python3
-cq = q.compress()
-assert cq.n_leaves == q.n_leaves
+q2 = Qube.from_tree("""
+    root, frequency=6:00:00
+    ├── levtype=pl, param=t, levelist=850, threshold=-2/-4/-8/2/4/8
+    └── levtype=sfc
+        ├── param=10u/10v, threshold=10/15
+        ├── param=2t, threshold=273.15
+        └── param=tp, threshold=0.1/1/10/100/20/25/5/50
+""")
+q2
+```
+We would not recommend trying to write this representation by hand though.
+
+Finally, quite a flexible approach is to take the union of a series of dense datacubes:
+
+```{code-cell} python3
+q3 = Qube.from_datacube(
+    dict(
+        param="10u/10v/2d/2t/cp/msl/skt/sp/tcw/tp".split("/"),
+        threshold="*",
+        levtype="sfc",
+        frequency="6:00:00",
+    )
+) | Qube.from_datacube(
+    dict(
+        param="q/t/u/v/w/z".split("/"),
+        threshold="*",
+        levtype="pl",
+        level="50/100/150/200/250/300/400/500/600/700/850".split("/"),
+        frequency="6:00:00",
+    )
+)
+q3
+```
+
+## Operations on Qubes
+
+Going back to that first qube:
+```{code-cell} python3
+q1
+```
+
+We can compress it:
+
+```{code-cell} python3
+cq = q1.compress()
+assert cq.n_leaves == q1.n_leaves
 print(f"{cq.n_leaves = }, {cq.n_nodes = }")
 cq
 ```

-Load a larger example qube (requires source checkout):
+With the HTML representation you can click on the leaves to expand them. You can copy a path representation of a node to the clipboard by alt/option/⌥ clicking on it. You can then extract that node in code using `[]`:

 ```{code-cell} python3
-import requests
-qube_json = requests.get("https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()
-climate_dt = Qube.from_json(qube_json)
-
-# Using the html or print methods is optional but lets you specify things like the depth of the tree to display.
-print(f"{climate_dt.n_leaves = }, {climate_dt.n_nodes = }")
-climate_dt.html(depth=1) # Limit how much is open initially, click leave to see more.
+cq["class=rd,expver=0001"]
 ```

+Select a subtree:
+
+```{code-cell} python3
+cq["class", "od"]["expver", "0001"]
+```
+
+Intersect with a dense datacube:
+
+```{code-cell} python3
+dq = Qube.from_datacube({
+    "class": ["od", "rd", "cd"],
+    "expver": ["0001", "0002", "0003"],
+    "param": "2",
+})
+
+(cq & dq).print()
+```
+
+
+## Iteration
+
+Iterate over the leaves:
+
+```{code-cell} python3
+for i, identifier in enumerate(cq.leaves()):
+    print(identifier)
+    if i > 10:
+        print("...")
+        break
+```
+
+Or if you can it's more efficient to iterate over the datacubes:
+
+```{code-cell} python3
+list(cq.datacubes())
+```
+
+## Selection
 Select a subset of the tree:

 ```{code-cell} python3
@ -77,43 +189,85 @@ for key, values in axes.items():
 ```


-<!-- ### Set Operations
+## Set Operations
+
+The union/intersection/difference of two dense datacubes is not itself dense.

 ```{code-cell} python3
-A = Qube.from_dict({
-    "a=1/2/3" : {"b=1/2/3" : {"c=1/2/3" : {}}},
-    "a=5" : {  "b=4" : {  "c=4" : {}}}
-    })
+A = Qube.from_dict({"a=1/2/3" : {"b=i/j/k" : {}},})
+B = Qube.from_dict({"a=2/3/4" : {"b=j/k/l" : {}},})

-B = Qube.from_dict({
-    "a=1/2/3" : {"b=1/2/3" : {"c=1/2/3" : {}}},
-    "a=5" : {  "b=4" : {  "c=4" : {}}}
-    })
-
-A.print(name="A"), B.print(name="B");
-
-A | B
-``` -->
-
-<!-- ### Command Line Usage
-
-```bash 
-fdb list class=rd,expver=0001,... | qubed --from=fdblist --to=text
+A.print(), B.print();
 ```

-`--from` options include: 
-* `fdblist`
-* `json`
-* `protobuf`
-* `marslist`
-* `constraints`
+Union:

-`--to` options include:
-* `text`
-* `html`
-* `json`
-* `datacubes`
-* `constraints`
+```{code-cell} python3
+(A | B).print();
+```

-use `--input` and `--output` to specify input and output files respectively. -->
+Intersection:

+```{code-cell} python3
+(A & B).print();
+```
+
+Difference:
+
+```{code-cell} python3
+(A - B).print();
+```
+
+Symmetric Difference:
+
+```{code-cell} python3
+(A ^ B).print();
+```
+
+## Transformations
+
+`q.transform` takes a python function from one node to one or more nodes and uses this to build a new tree. This can be used for simple operations on the key or values but also to split or remove nodes. Note that you can't use it to merge nodes beause it's only allowed to see one node at a time.
+
+```{code-cell} python3
+def capitalize(node): return node.replace(key = node.key.capitalize())
+climate_dt.transform(capitalize).html(depth=1)
+```
+
+## Save to disk
+
+There is currently a very simple JSON serialisation format. More compact binary serialisations are planned.
+```{code-cell} python3
+json = climate_dt.to_json()
+Qube.from_json(json) == climate_dt
+```
+
+## Advanced Selection
+
+There is currently partial support for different datatypes in addition to strings. Here we can convert datatypes by key to ints and timedeltas and then use functions as filters in select.
+
+```{code-cell} python3
+from datetime import timedelta, datetime
+def to_timedelta(t):
+    dt = datetime.strptime(t, "%H:%M:%S")
+    return timedelta(hours=dt.hour, minutes=dt.minute, seconds=dt.second)
+
+q = Qube.from_tree("""
+root, frequency=6:00:00
+├── levtype=pl, levelist=850, threshold=-2/-4/-8/2/4/8
+└── levtype=sfc
+    ├── param=10u/10v, threshold=10/15
+    ├── param=2t, threshold=273.15
+    └── param=tp, threshold=0.1/1/10/100/20/25/5/50
+""").convert_dtypes({
+    "threshold": float,
+    "levelist": int,
+    "frequency": to_timedelta,
+})
+
+r = q.select({
+        "threshold": lambda t: t > 5,
+        "frequency": lambda dt: dt > timedelta(hours = 2),
+})
+
+r
+```
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -0,0 +1,3 @@
+numpy
+scipy
+objsize
--- a/fiab/.gitignore
+++ b/fiab/.gitignore
@ -0,0 +1 @@
+!*.json
--- a/fiab/example_products.md
+++ b/fiab/example_products.md
@ -0,0 +1,37 @@
+
+
+
+Simplest possible product
+- one field: 2 metre temperature
+- all models that output param=2t would work
+- may also have a lead time range specified from
+
+So we could say "here are all the models with param=2t with lead times in the specified interval"
+
+quantiles
+  param:
+  float range from 0 - 100
+
+threshold:
+  "give me 2 metre temperature values that are above this threshold"
+
+
+  product requrements can be specified as a set of:
+    params: one or more params
+    levels: one or more or all
+    time:
+      - product could be specific to a particular time
+      - could require at least a months worth of data
+
+
+make some fake models that have:
+ - fewer params
+ - continous times vs steps of 6 hours
+ -
+
+
+Could also represent what data is currently cached on disk and be able to then tell the use what stuff they can generate really fast.
+
+API want:
+  - way to get axis span like what params exist
+  -
--- a/fiab/extract.py
+++ b/fiab/extract.py
@ -0,0 +1,48 @@
+import json
+from collections import defaultdict
+
+from qubed import Qube
+
+metadata = json.load(open("raw_anemoi_metadata.json"))
+
+predicted_indices = [
+    *metadata["data_indices"]["data"]["output"]["prognostic"],
+    *metadata["data_indices"]["data"]["output"]["diagnostic"],
+]
+variables = metadata["dataset"]["variables"]
+variables = [variables[i] for i in predicted_indices]
+
+# print('Raw Model Variables:', variables)
+
+# Split variables between pressure and surface
+surface_variables = [v for v in variables if "_" not in v]
+
+# Collect the levels for each pressure variable
+level_variables = defaultdict(list)
+for v in variables:
+    if "_" in v:
+        variable, level = v.split("_")
+        level_variables[variable].append(int(level))
+
+# print(level_variables)
+
+model_tree = Qube.empty()
+
+for variable, levels in level_variables.items():
+    model_tree = model_tree | Qube.from_datacube(
+        {
+            "levtype": "pl",
+            "param": variable,
+            "level": levels,
+        }
+    )
+
+for variable in surface_variables:
+    model_tree = model_tree | Qube.from_datacube(
+        {
+            "levtype": "sfc",
+            "param": variable,
+        }
+    )
+
+print(model_tree.to_json())
--- a/fiab/raw_anemoi_metadata.json
+++ b/fiab/raw_anemoi_metadata.json
--- a/fiab/structure.yaml
+++ b/fiab/structure.yaml
@ -0,0 +1,67 @@
+# Format: list of models, each model has a model_outputs field which contains a nested tree of nodes
+# Nodes have {node: name, cube: list of key value(s) pairs, children: list[nodes]}
+
+- model: surface_and_atmosphere_model
+  model_outputs:
+    - node: root
+      cube:
+        class: rd
+        stream: anemoi
+        expver: something
+        lead_time:
+          type: datetime
+          format: '%Y-%m-%d %H:%M:%S'
+          step: 6h
+
+
+      children:
+        - node: pressure_variables
+          other_metadata: something
+          cube:
+            param: ['q', 't', 'u', 'v', 'w', 'z']
+            level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
+
+
+        - node: surface_variables
+          other_metadata: something
+          cube:
+            param: ['sp', 'msl', '10u', '10v', '2t', '2d', 'skt', 'tcw', 'cp', 'tp']
+
+        # Hypothetical Ocean variables
+        - node: ocean_variables
+          cube:
+            param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"]
+            ocean_levels: [??, ??]
+
+  # Alternative List of cubes format
+  - model: surface_and_atmosphere_model
+  model_outputs:
+    - node: root
+      cube:
+        class: rd
+        stream: anemoi
+        expver: something
+        lead_time:
+          type: datetime
+          format: '%Y-%m-%d %H:%M:%S'
+          step: 6h
+
+
+      children:
+        - node: pressure_variables
+          other_metadata: something
+          cube:
+            param: ['q', 't', 'u', 'v', 'w', 'z']
+            level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
+
+
+        - node: surface_variables
+          other_metadata: something
+          cube:
+            param: ['sp', 'msl', '10u', '10v', '2t', '2d', 'skt', 'tcw', 'cp', 'tp']
+
+        # Hypothetical Ocean variables
+        - node: ocean_variables
+          cube:
+            param: ["saltiness", "number of eels", "is_blue", "good_for_surfing"]
+            ocean_levels: [??, ??]
--- a/notebooks/DataCubeTree.py
+++ b/notebooks/DataCubeTree.py
@ -1,395 +0,0 @@
-import dataclasses
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from datetime import date, datetime, timedelta
-from typing import Any, Callable, Iterable, Literal
-
-
-@dataclass(frozen=True)
-class HTML():
-    html: str
-    def _repr_html_(self):
-        return self.html
-
-@dataclass(frozen=True)
-class Values(ABC):
-    @abstractmethod
-    def summary(self) -> str:
-        pass
-    @abstractmethod
-    def __len__(self) -> int:
-        pass
-
-    @abstractmethod
-    def __contains__(self, value: Any) -> bool:
-        pass
-
-    @abstractmethod
-    def from_strings(self, values: list[str]) -> list['Values']:
-        pass
-
-@dataclass(frozen=True)
-class Enum(Values):
-    """
-    The simplest kind of key value is just a list of strings.
-    summary -> string1/string2/string....
-    """
-    values: list[Any]
-
-    def __len__(self) -> int:
-        return len(self.values)
-    def summary(self) -> str:
-        return '/'.join(sorted(self.values))
-    def __contains__(self, value: Any) -> bool:
-        return value in self.values
-    def from_strings(self, values: list[str]) -> list['Values']:
-        return [Enum(values)]
-
-@dataclass(frozen=True)
-class Range(Values, ABC):
-    dtype: str = dataclasses.field(kw_only=True)
-
-@dataclass(frozen=True)
-class DateRange(Range):
-    start: date
-    end: date
-    step: timedelta
-    dtype: Literal["date"] = dataclasses.field(kw_only=True, default="date")
-
-    @classmethod
-    def from_strings(self, values: list[str]) -> list['DateRange']:
-        dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values])
-        if len(dates) < 2:
-            return [DateRange(
-                start=dates[0],
-                end=dates[0],
-                step=timedelta(days=0)
-            )]
-        
-        ranges = []
-        current_range, dates = [dates[0],], dates[1:]
-        while len(dates) > 1:
-            if dates[0] - current_range[-1] == timedelta(days=1):
-                current_range.append(dates.pop(0))
-            
-            elif len(current_range) == 1:
-                ranges.append(DateRange(
-                start=current_range[0],
-                end=current_range[0],
-                step=timedelta(days=0)
-                ))
-                current_range = [dates.pop(0),]
-
-            else:
-                ranges.append(DateRange(
-                start=current_range[0],
-                end=current_range[-1],
-                step=timedelta(days=1)
-                ))
-                current_range = [dates.pop(0),]
-        return ranges
-    
-    def __contains__(self, value: Any) -> bool:
-        v = datetime.strptime(value, "%Y%m%d").date()
-        return self.start <= v <= self.end and (v - self.start) % self.step == 0
-
-
-    def __len__(self) -> int:
-        return (self.end - self.start) // self.step
-    
-    def summary(self) -> str:
-        def fmt(d): return d.strftime("%Y%m%d")
-        if self.step == timedelta(days=0):
-            return f"{fmt(self.start)}"
-        if self.step == timedelta(days=1):
-            return f"{fmt(self.start)}/to/{fmt(self.end)}"
-        
-        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
-
-@dataclass(frozen=True)
-class TimeRange(Range):
-    start: int
-    end: int
-    step: int
-    dtype: Literal["time"] = dataclasses.field(kw_only=True, default="time")
-
-    @classmethod
-    def from_strings(self, values: list[str]) -> list['TimeRange']:
-        if len(values) == 0: return []
-
-        times = sorted([int(v) for v in values])
-        if len(times) < 2:
-            return [TimeRange(
-                start=times[0],
-                end=times[0],
-                step=100
-            )]
-        
-        ranges = []
-        current_range, times = [times[0],], times[1:]
-        while len(times) > 1:
-            if times[0] - current_range[-1] == 1:
-                current_range.append(times.pop(0))
-            
-            elif len(current_range) == 1:
-                ranges.append(TimeRange(
-                start=current_range[0],
-                end=current_range[0],
-                step=0
-                ))
-                current_range = [times.pop(0),]
-
-            else:
-                ranges.append(TimeRange(
-                start=current_range[0],
-                end=current_range[-1],
-                step=1
-                ))
-                current_range = [times.pop(0),]
-        return ranges
-
-    def __len__(self) -> int:
-        return (self.end - self.start) // self.step
-    
-    def summary(self) -> str:
-        def fmt(d): return f"{d:04d}"
-        if self.step == 0:
-            return f"{fmt(self.start)}"
-        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
-    
-    def __contains__(self, value: Any) -> bool:
-        v = int(value)
-        return self.start <= v <= self.end and (v - self.start) % self.step == 0
-
-@dataclass(frozen=True)
-class IntRange(Range):
-    dtype: Literal["int"]
-    start: int
-    end: int
-    step: int
-    dtype: Literal["int"] = dataclasses.field(kw_only=True, default="int")
-
-    def __len__(self) -> int:
-        return (self.end - self.start) // self.step
-    
-    def summary(self) -> str:
-        def fmt(d): return d.strftime("%Y%m%d")
-        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
-    
-    def __contains__(self, value: Any) -> bool:
-        v = int(value)
-        return self.start <= v <= self.end and (v - self.start) % self.step == 0
-    
-
-def values_from_json(obj) -> Values:
-    if isinstance(obj, list): 
-        return Enum(obj)
-
-    match obj["dtype"]:
-        case "date": return DateRange(**obj)
-        case "time": return TimeRange(**obj)
-        case "int": return IntRange(**obj)
-        case _: raise ValueError(f"Unknown dtype {obj['dtype']}")
-
-@dataclass(frozen=True)
-class Node:
-    key: str
-    values: Values # Must support len()
-    metadata: dict[str, str] # Applies to all children
-    payload: list[Any] # List of size product(len(n.values) for n in  ancestors(self))
-    children: list['Node']
-
-def summarize_node(node: Node) -> tuple[str, Node]:
-    """
-    Extracts a summarized representation of the node while collapsing single-child paths.
-    Returns the summary string and the last node in the chain that has multiple children.
-    """
-    summary = []
-    
-    while True:
-        values_summary = node.values.summary()
-        if len(values_summary) > 50:
-            values_summary = values_summary[:50] + "..."
-        summary.append(f"{node.key}={values_summary}")
-
-        # Move down if there's exactly one child, otherwise stop
-        if len(node.children) != 1:
-            break
-        node = node.children[0]
-
-    return ", ".join(summary), node
-
-def node_tree_to_string(node : Node, prefix : str = "", depth = None) -> Iterable[str]:
-    summary, node = summarize_node(node)
-    
-    if depth is not None and depth <= 0:
-        yield summary + " - ...\n"
-        return
-    # Special case for nodes with only a single child, this makes the printed representation more compact
-    elif len(node.children) == 1:
-        yield summary + ", "
-        yield from node_tree_to_string(node.children[0], prefix, depth = depth)
-        return
-    else:
-        yield summary + "\n"
-    
-    for index, child in enumerate(node.children):
-        connector = "└── " if index == len(node.children) - 1 else "├── "
-        yield prefix + connector
-        extension = "    " if index == len(node.children) - 1 else "│   "
-        yield from node_tree_to_string(child, prefix + extension, depth = depth - 1 if depth is not None else None)
-
-def node_tree_to_html(node : Node, prefix : str = "", depth = 1, connector = "") -> Iterable[str]:
-    summary, node = summarize_node(node)
-    
-    if len(node.children) == 0:
-        yield f'<span class="leaf">{connector}{summary}</span>'
-        return
-    else:
-        open = "open" if depth > 0 else ""
-        yield f"<details {open}><summary>{connector}{summary}</summary>"
-
-    for index, child in enumerate(node.children):
-        connector = "└── " if index == len(node.children) - 1 else "├── "
-        extension = "    " if index == len(node.children) - 1 else "│   "
-        yield from node_tree_to_html(child, prefix + extension, depth = depth - 1, connector = prefix+connector)
-    yield "</details>"
-
-@dataclass(frozen=True)
-class CompressedTree:
-    root: Node
-
-    @classmethod
-    def from_json(cls, json: dict) -> 'CompressedTree':
-        def from_json(json: dict) -> Node:
-            return Node(
-                key=json["key"],
-                values=values_from_json(json["values"]),
-                metadata=json["metadata"] if "metadata" in json else {},
-                payload=json["payload"] if "payload" in json else [],
-                children=[from_json(c) for c in json["children"]]
-            )
-        return CompressedTree(root=from_json(json))
-    
-    def __str__(self):
-        return "".join(node_tree_to_string(node=self.root))
-    
-    def html(self, depth = 2) -> HTML:
-        return HTML(self._repr_html_(depth = depth))
-    
-    def _repr_html_(self, depth = 2):
-        css = """
-        <style>
-        .qubed-tree-view {
-            font-family: monospace;
-            white-space: pre;
-        }
-        .qubed-tree-view details {
-            # display: inline;
-            margin-left: 0;
-        }
-        .qubed-tree-view summary {
-            list-style: none;
-            cursor: pointer;
-            text-overflow: ellipsis;
-            overflow: hidden;
-            text-wrap: nowrap;
-            display: block;
-        }
-
-        .qubed-tree-view .leaf {
-            text-overflow: ellipsis;
-            overflow: hidden;
-            text-wrap: nowrap;
-            display: block;
-        }
-
-        .qubed-tree-view summary:hover,span.leaf:hover {
-            background-color: #f0f0f0;
-        }
-        .qubed-tree-view details > summary::after {
-            content: ' ';
-        }
-        .qubed-tree-view details:not([open]) > summary::after {
-            content: " ▼";
-        }
-        </style>
-
-        """
-        nodes = "".join(cc for c in self.root.children for cc in node_tree_to_html(node=c, depth=depth))
-        return f"{css}<pre class='qubed-tree-view'>{nodes}</pre>"
-    
-    def print(self, depth = None):
-        print("".join(cc for c in self.root.children for cc in node_tree_to_string(node=c, depth = depth)))
-
-    def transform(self, func: Callable[[Node], Node]) -> 'CompressedTree':
-        "Call a function on every node of the tree, any changes to the children of a node will be ignored."
-        def transform(node: Node) -> Node:
-            new_node = func(node)
-            return dataclasses.replace(new_node, children = [transform(c) for c in node.children])
-        return CompressedTree(root=transform(self.root))
-
-    def guess_datatypes(self) -> 'CompressedTree':
-        def guess_datatypes(node: Node) -> list[Node]:
-            # Try to convert enum values into more structured types
-            children = [cc for c in node.children for cc in guess_datatypes(c)]
-
-            if isinstance(node.values, Enum):
-                match node.key:
-                    case "time": range_class = TimeRange
-                    case "date": range_class = DateRange
-                    case _: range_class = None
-
-                if range_class is not None:
-                    return [
-                        dataclasses.replace(node, values = range, children = children)
-                        for range in range_class.from_strings(node.values.values)
-                    ]
-            return [dataclasses.replace(node, children = children)]
-
-        children = [cc for c in self.root.children for cc in guess_datatypes(c)]
-        return CompressedTree(root=dataclasses.replace(self.root, children = children))
-
-    
-    def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'CompressedTree':
-        # make all values lists
-        selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
-
-        def not_none(xs): return [x for x in xs if x is not None]
-
-        def select(node: Node) -> Node | None: 
-            # Check if the key is specified in the selection
-            if node.key not in selection: 
-                if mode == "strict":
-                    return None
-                return dataclasses.replace(node, children = not_none(select(c) for c in node.children))
-            
-            # If the key is specified, check if any of the values match
-            values = Enum([ c for c in selection[node.key] if c in node.values])
-
-            if not values: 
-                return None 
-            
-            return dataclasses.replace(node, values = values, children = not_none(select(c) for c in node.children))
-            
-        return CompressedTree(root=dataclasses.replace(self.root, children = not_none(select(c) for c in self.root.children)))
-    
-    def to_list_of_cubes(self):
-        def to_list_of_cubes(node: Node) -> list[list[Node]]:
-            return [[node] + sub_cube for c in node.children for sub_cube in to_list_of_cubes(c)]
-
-        return to_list_of_cubes(self.root)
-
-    def info(self):
-        cubes = self.to_list_of_cubes()
-        print(f"Number of distinct paths: {len(cubes)}")
-
-        
-
-        
-# What should the interace look like?
-
-# tree = CompressedTree.from_json(...)
-# tree = CompressedTree.from_protobuf(...)
-
-# tree.print(depth = 5) # Prints a nice tree representation
--- a/notebooks/test.ipynb
+++ b/notebooks/test.ipynb
@ -1,655 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "d4ca1d75-6dec-48d3-a448-d46bb0d65602",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "d9966f80-7bd3-4404-920e-c8262f304a02",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <style>\n",
-       "        .qubed-tree-view {\n",
-       "            font-family: monospace;\n",
-       "            white-space: pre;\n",
-       "        }\n",
-       "        .qubed-tree-view details {\n",
-       "            # display: inline;\n",
-       "            margin-left: 0;\n",
-       "        }\n",
-       "        .qubed-tree-view summary {\n",
-       "            list-style: none;\n",
-       "            cursor: pointer;\n",
-       "            text-overflow: ellipsis;\n",
-       "            overflow: hidden;\n",
-       "            text-wrap: nowrap;\n",
-       "            display: block;\n",
-       "        }\n",
-       "\n",
-       "        .qubed-tree-view .leaf {\n",
-       "            text-overflow: ellipsis;\n",
-       "            overflow: hidden;\n",
-       "            text-wrap: nowrap;\n",
-       "            display: block;\n",
-       "        }\n",
-       "\n",
-       "        .qubed-tree-view summary:hover,span.leaf:hover {\n",
-       "            background-color: #f0f0f0;\n",
-       "        }\n",
-       "        .qubed-tree-view details > summary::after {\n",
-       "            content: ' ▲';\n",
-       "        }\n",
-       "        .qubed-tree-view details:not([open]) > summary::after {\n",
-       "            content: \" ▼\";\n",
-       "        }\n",
-       "        </style>\n",
-       "\n",
-       "        <pre class='qubed-tree-view'><details open><summary>root</summary><details open><summary>├── class=od</summary><details ><summary>│   ├── expver=0001/0002/0003</summary><span class=\"leaf\">│   │   ├── param=1</span><span class=\"leaf\">│   │   └── param=2</span></details><details ><summary>│   └── expver=0003/0004</summary><span class=\"leaf\">│       ├── param=1</span><span class=\"leaf\">│       └── param=2</span></details></details><details open><summary>└── class=rd</summary><details ><summary>    ├── expver=0001/0002</summary><span class=\"leaf\">    │   ├── param=1</span><span class=\"leaf\">    │   └── param=2</span></details><details ><summary>    └── expver=0003/0004</summary><span class=\"leaf\">        ├── param=1</span><span class=\"leaf\">        └── param=2</span></details></details></details></pre>"
-      ],
-      "text/plain": [
-       "Qube(data=NodeData(key='root', values=Enum(values=('root',)), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='class', values=Enum(values=('od',)), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='expver', values=Enum(values=('0001', '0002', '0003')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=()))), Qube(data=NodeData(key='expver', values=Enum(values=('0003', '0004')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=()))))), Qube(data=NodeData(key='class', values=Enum(values=('rd',)), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='expver', values=Enum(values=('0001', '0002')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=()))), Qube(data=NodeData(key='expver', values=Enum(values=('0003', '0004')), metadata=frozendict.frozendict({})), children=(Qube(data=NodeData(key='param', values=Enum(values=('1',)), metadata=frozendict.frozendict({})), children=()), Qube(data=NodeData(key='param', values=Enum(values=('2',)), metadata=frozendict.frozendict({})), children=())))))))"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from qubed import Qube\n",
-    "\n",
-    "q = Qube.from_dict({\n",
-    "    \"class=od\" : {\n",
-    "        \"expver=0001/0002/0003\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=rd\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "})\n",
-    "\n",
-    "q"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "261f32c8-74c6-4cc9-9000-bf9bf9ff3456",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "root\n",
-      "├── class=od\n",
-      "│   ├── expver=0001\n",
-      "│   │   ├── param=1\n",
-      "│   │   └── param=2\n",
-      "│   └── expver=0002\n",
-      "│       ├── param=1\n",
-      "│       └── param=2\n",
-      "└── class=rd\n",
-      "    ├── expver=0001\n",
-      "    │   ├── param=1\n",
-      "    │   ├── param=2\n",
-      "    │   └── param=3\n",
-      "    └── expver=0002\n",
-      "        ├── param=1\n",
-      "        └── param=2\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from qubed import Qube\n",
-    "\n",
-    "q = Qube.from_dict({\n",
-    "    \"class=od\" : {\n",
-    "        \"expver=0001\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=rd\" : {\n",
-    "        \"expver=0001\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
-    "        \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "})\n",
-    "\n",
-    "q.print()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "7d1b353c-44a6-45be-bd02-2116771ed84d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "root\n",
-      "├── class=od, expver=0001/0002, param=1/2\n",
-      "└── class=rd\n",
-      "    ├── expver=0001, param=1/2/3\n",
-      "    └── expver=0002, param=1/2\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "q.compress().print()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "73193a0d-2d0d-4d64-9f3c-fc5f5dfe7c5e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "\n",
-    "from tree_traverser.DataCubeTree import Tree\n",
-    "from tree_traverser.CompressedDataCubeTree import CompressedTree\n",
-    "\n",
-    "t1 = Tree.from_dict({\n",
-    "    \"class=od\" : {\n",
-    "        \"expver=0001/0002/0003\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=rd\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    # \"class=cd\" : {\n",
-    "    #     \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    #     \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    #     \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
-    "    # }\n",
-    "})\n",
-    "\n",
-    "print(str(t1))\n",
-    "\n",
-    "ct = CompressedTree.from_tree(t1).compress()\n",
-    "print(str(ct))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c025d24e-e769-499e-90c3-efc836c4b399",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def tree_from_dict(d):\n",
-    "    if not d: return {}\n",
-    "    k = next(iter(d))\n",
-    "    v = d.pop(k)\n",
-    "    if not isinstance(v, list): v = [v,]\n",
-    "    return {f\"{k}={'/'.join(v)}\" : tree_from_dict(d)}\n",
-    "\n",
-    "t1 = Tree.from_dict(tree_from_dict({'class': ['d1', 'd2'],\n",
-    " 'dataset': 'climate-dt',\n",
-    " 'generation': ['1','2','3'],\n",
-    " 'model': 'icon',\n",
-    " 'date': ['20241102','20241103'],\n",
-    " 'resolution': ['high','low'],\n",
-    " 'time': ['0000', '0600', '1200', '1800'],\n",
-    "}))\n",
-    "print(str(t1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c85ba0e3-6754-4227-8dd5-ccca57714420",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t1 = Tree.from_dict({\n",
-    "    \"class=od\" : {\n",
-    "        \"expver=0001\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=rd\" : {\n",
-    "        \"expver=0001\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
-    "        \"expver=0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "})\n",
-    "\n",
-    "t1.print()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "17145fb0-82ec-4716-9eca-83fdae4fbcdf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ct = CompressedTree.from_tree(t1).compress()\n",
-    "ct.print()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f70e9e53-326a-4158-bf62-339fd765e5f8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "\n",
-    "import orjson as json\n",
-    "from tree_traverser.DataCubeTree import Tree\n",
-    "# from tree_traverser.CompressedDataCubeTree import CompressedTree\n",
-    "\n",
-    "t1 = Tree.from_dict({\n",
-    "    \"class=od\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=rd\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=cd\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
-    "    }\n",
-    "})\n",
-    "\n",
-    "t1.html(depth=5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4f831008-7e36-4c51-9cf8-b10af69b35c0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "\n",
-    "import orjson as json\n",
-    "from tree_traverser.DataCubeTree import Tree\n",
-    "from tree_traverser.CompressedDataCubeTree import CompressedTree\n",
-    "\n",
-    "t1 = Tree.from_dict({\n",
-    "    \"class=od\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=rd\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=cd\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
-    "    }\n",
-    "})\n",
-    "\n",
-    "t1.html(depth=5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "49d0f344-34ad-476b-bb27-7441b9d8cddb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t1 = Tree.empty()\n",
-    "\n",
-    "t1 = t1.insert(dict(a = [1,2,3], b = [2,3,4]))\n",
-    "t1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cdab9360-f465-4570-9670-29a9567dc962",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t1 = t1.insert(dict(a = [4,5,6], b = [2,3,4]))\n",
-    "t1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4189ea19-793d-46e0-bbd6-550a14c9626c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t1 = t1.insert(dict(a = [1], b = [5]))\n",
-    "t1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d1564a02-9d47-43af-990d-54493a76e029",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ct = CompressedTree.from_tree(t1).compress()\n",
-    "ct.html(debug = False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f50d6f52-b417-4dae-8b94-a4fe5535d0ca",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t1 = Tree.from_dict({\n",
-    "    \"class=od\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=rd\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "    },\n",
-    "    \"class=cd\" : {\n",
-    "        \"expver=0001/0002\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0003/0004\": {\"param=1\":{}, \"param=2\":{}},\n",
-    "        \"expver=0005\": {\"param=1\":{}, \"param=2\":{}, \"param=3\":{}},\n",
-    "    }\n",
-    "})\n",
-    "\n",
-    "t1.html(depth=5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "af63c581-aa28-4f35-be7f-0044ccdce671",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ct = CompressedTree.from_tree(t1).compress()\n",
-    "ct.html(debug = False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "15ec656b-d7fa-45f9-97a4-0615ee8104ee",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ct = CompressedTree.from_tree(t1).compress()\n",
-    "print(str(ct))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "eeb388cd-d9e4-4959-97fa-c8874abf68d9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ct.lookup({\n",
-    "    \"class\" : \"rd\"\n",
-    "})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "52a6a4de-ca76-48a0-8931-ea3f0569d3a4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for k in sorted(ct.cache):\n",
-    "    node = ct.cache[k]\n",
-    "    print(k, ct.cache[k].summary())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ea5ee804-740b-435d-9455-b22ac6ba07db",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(\"keys.txt\", \"r\") as f:\n",
-    "    for line in f.readlines():\n",
-    "        print(line)\n",
-    "        j = json.loads(line.replace(\"'\", '\"'))\n",
-    "        path = j[\"path\"]\n",
-    "        keys = j[\"keys\"]\n",
-    "        offset = j[\"offset\"]\n",
-    "        \n",
-    "        print(path, keys)\n",
-    "        break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cf2d2099-98d4-4e41-9fb9-1f2f4cbe3bd9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ct[\"class\", \"od\"][\"expver\", \"0001\"].root._children, ct[\"class\", \"od\"][\"expver\", \"0003\"].root._children, "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9cb2619b-a0aa-4a1e-970b-04bf5da8a784",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from tree_traverser.trie import Trie\n",
-    "\n",
-    "t = Trie()\n",
-    "i = t.insert(\"/data/prod_1/fdb/d1:\")\n",
-    "t"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c9842fb8-2bef-48fc-bd59-4526087c2cf7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t.lookup_by_id(i)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6e4ef453-b20f-489f-9ef9-d197626fc71e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_path = Path(\"/Users/math/git/rust/qubed/config/climate-dt/new_format.json\")\n",
-    "with data_path.open(\"r\") as f:\n",
-    "    climate_dt = Tree.from_json(json.loads(f.read()))\n",
-    "\n",
-    "# climate_dt = climate_dt.guess_datatypes()\n",
-    "\n",
-    "filtered = climate_dt.select({\n",
-    "  # \"activity\": \"scenariomip\",\n",
-    "  # \"date\": \"20201103\",\n",
-    "   # \"model\": \"ifs-nemo\",\n",
-    "    # \"levtype\": \"sfc\",\n",
-    "  #   \"param\": \"129\",\n",
-    "    # \"stream\": \"clte\",\n",
-    "})\n",
-    "\n",
-    "filtered.html(depth = 1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "684c2520-bcb6-4e1c-acb3-e116a742f3d1",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "61a54328-e64c-4f39-9123-3d9890316678",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "user_request = {\n",
-    "  \"activity\": \"scenariomip\",\n",
-    "  \"datetime\": {\"from\": \"2020-11-03:09:00\", \"to\": \"2021-11-03:06:00\"},\n",
-    "  \"model\": \"ifs-nemo\",\n",
-    "  \"levtype\": \"sfc\",\n",
-    "    ...\n",
-    "}\n",
-    "\n",
-    "# Load from disk, from the api, from somewhere\n",
-    "# Has information about grids and FDB URIs\n",
-    "climate_dt_tree = Tree.load(...) \n",
-    "\n",
-    "# Do some preliminary filtering on the tree, eliminating \n",
-    "# If the FDB URI info is too heavy we could choose to only load it after filtering\n",
-    "filtered_tree = climate_dt_tree.select(user_request)\n",
-    "\n",
-    "# Polytope takes the user request and the now filtered tree\n",
-    "# uses the grid information and other info to decide what indices it wants from grib jump for each leaf of the tree\n",
-    "# encodes this using the payload mechanism of the tree\n",
-    "index_tree_for_gribjump = polytope.do(user_request, filtered_tree)\n",
-    "\n",
-    "# This tree has the data associated with each leaf attached to it\n",
-    "data_tree = send_to_gribjump(index_tree_for_gribjump)\n",
-    "\n",
-    "output = polytope.process(data_tree)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "20b05adb-b760-4e6e-9ccf-8e2531cd8d5d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filtered.print()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b769f3d7-d0c3-42da-8b6d-64f00bc63dd9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import unicodedata\n",
-    "\n",
-    "with open(\"/Users/math/Downloads/cads-forms-reanalysis-prod@da54febf9c6/reanalysis-era5-single-levels/gecko-config/mars.list\", \"r\") as f:\n",
-    "    levels = []\n",
-    "    for i, l in enumerate(f.readlines()):\n",
-    "        level = len(l)-len(l.lstrip(' '))\n",
-    "        l = l.lstrip().rstrip()\n",
-    "        if level == 2:\n",
-    "            assert len(levels) == 2\n",
-    "            kvs = [kv for lv in levels + [l] for kv in lv.split(\",\")]\n",
-    "            d = {}\n",
-    "            for kv in kvs:\n",
-    "                key, values = kv.split(\"=\")\n",
-    "                values = values.split(\"/\")\n",
-    "                assert key not in d\n",
-    "                d[key] = values\n",
-    "            print(d)\n",
-    "        else:\n",
-    "            levels = levels[:level]\n",
-    "            levels.append(l)\n",
-    "        if i > 10: break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d62dfb70-6fd8-4096-9c01-2210bb010197",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "r = dict(a = 1, b = 2, c = 3)\n",
-    "while r:\n",
-    "    print(r.popitem())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "60929956-bf0b-4f2d-95a4-a6b9f1708e73",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "s = \"{class=d1,dataset=climate-dt,activity=scenariomip,experiment=ssp3-7.0,generation=1,model=icon,realization=1,expver=0001,stream=clte,date=20241102}{resolution=high,type=fc,levtype=sfc}{time=0000,param=168}\"\n",
-    "\n",
-    "dict(l.split(\"=\", maxsplit=1) for l in s.replace(\"{\", \" \").replace(\"}\", \" \").replace(\",\", \" \").split())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "152612d2-14f3-4639-8bc8-e4de7fad4ef4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "str.split?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8b21a006-37ef-49ac-81ef-97cc9e70c63e",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python [conda env:micromamba-catalogs]",
-   "language": "python",
-   "name": "conda-env-micromamba-catalogs-py"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [build-system]
-requires = ["setuptools", "wheel"]
-build-backend = "setuptools.build_meta"
+requires = ["maturin>=1.0,<2.0"]
+build-backend = "maturin"

 [project]
 name = "qubed"
@ -18,6 +18,13 @@ requires-python = ">= 3.11"
 dynamic = ["version"]
 dependencies = [
    "frozendict",
+    "numpy",
+    "protobuf",
+
+    # CLI
+    "rich",
+    "click",
+    "psutil"
 ]

 # Because this is a mixed rust/python project the structure is src/python/qubed rather than the more typical src/qubed
@ -35,7 +42,6 @@ features = ["pyo3/extension-module"]


 [project.optional-dependencies]
-rust-backend = ["maturin>=1.7,<2.0", "pyo3"]

 stac_server = [
    "fastapi",
--- a/scripts/build_images.sh
+++ b/scripts/build_images.sh
@ -7,9 +7,3 @@ sudo docker build \
    --target=stac_server \
    .
 sudo docker push eccr.ecmwf.int/qubed/stac_server:latest
-
-sudo docker build \
-    --tag=eccr.ecmwf.int/qubed/web_query_builder:latest \
-    --target=web_query_builder \
-    .
-sudo docker push eccr.ecmwf.int/qubed/web_query_builder:latest
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@ -1 +1,2 @@
-helm upgrade stac-server chart -n stac-server
+# helm install qubed chart -n qubed
+helm upgrade qubed chart -n qubed
--- a/scripts/load_redis.py
+++ b/scripts/load_redis.py
@ -1,19 +0,0 @@
-#! .venv/bin/python
-
-import redis
-import yaml
-import json
-
-print("Opening redis connection")
-r = redis.Redis(host="redis", port=6379, db=0)
-
-print("Loading data from local files")
-with open("config/climate-dt/compressed_tree.json") as f:
-    compressed_catalog = json.load(f)
-
-with open("config/climate-dt/language.yaml") as f:
-    mars_language = yaml.safe_load(f)["_field"]
-
-print("Storing data in redis")
-r.set('compressed_catalog', json.dumps(compressed_catalog))
-r.set('mars_language', json.dumps(mars_language))
--- a/scripts/logs.sh
+++ b/scripts/logs.sh
@ -0,0 +1 @@
+kubectl -n qubed logs deployment/stac-server
--- a/scripts/restart.sh
+++ b/scripts/restart.sh
@ -1,3 +1,2 @@
 # kubectl rollout restart deployment/redis
-kubectl rollout restart deployment/web-query-builder
-kubectl rollout restart deployment/stac-server
+kubectl -n qubed rollout restart deployment/stac-server
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@ -1,3 +0,0 @@
-python3 -m venv .venv
-source .venv/bin/activate
-pip install pyyaml redis
--- a/src/python/qubed/Qube.py
+++ b/src/python/qubed/Qube.py
@ -1,151 +1,596 @@
-import dataclasses
-from collections import defaultdict
-from dataclasses import dataclass
-from functools import cached_property
-from typing import Any, Callable, Literal
+# This causes python types to be evaluated later,
+# allowing you to reference types like Qube inside the definion of the Qube class
+# without having to do "Qube"
+from __future__ import annotations

+import dataclasses
+import functools
+import json
+from collections import defaultdict
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from functools import cached_property
+from pathlib import Path
+from typing import Any, Iterable, Iterator, Literal, Mapping, Self, Sequence
+
+import numpy as np
 from frozendict import frozendict

 from . import set_operations
-from .node_types import NodeData, RootNodeData
-from .tree_formatters import HTML, node_tree_to_html, node_tree_to_string
-from .value_types import QEnum, Values, values_from_json
+from .metadata import from_nodes
+from .protobuf.adapters import proto_to_qube, qube_to_proto
+from .tree_formatters import (
+    HTML,
+    _display,
+    node_tree_to_html,
+    node_tree_to_string,
+)
+from .value_types import (
+    QEnum,
+    ValueGroup,
+    WildcardGroup,
+    values_from_json,
+)


-@dataclass(frozen=True, eq=True, order=True)
-class Qube:
-    data: NodeData
-    children: tuple['Qube', ...]
+@dataclass
+class AxisInfo:
+    key: str
+    type: Any
+    depths: set[int]
+    values: set

-    @property
-    def key(self) -> str:
-        return self.data.key
+    def combine(self, other: Self):
+        self.key = other.key
+        self.type = other.type
+        self.depths.update(other.depths)
+        self.values.update(other.values)
+        # print(f"combining {self} and {other} getting {result}")

-    @property
-    def values(self) -> Values:
-        return self.data.values
+    def to_json(self):
+        return {
+            "key": self.key,
+            "type": self.type.__name__,
+            "values": list(self.values),
+            "depths": list(self.depths),
+        }

-    @property
-    def metadata(self) -> frozendict[str, Any]:
-        return self.data.metadata

+@dataclass(frozen=True, eq=True, order=True, unsafe_hash=True)
+class QubeNamedRoot:
+    "Helper class to print a custom root name"
+
+    key: str
+    children: tuple[Qube, ...] = ()

    def summary(self) -> str:
-        return self.data.summary()
+        return self.key
+
+
+@dataclass(frozen=False, eq=True, order=True, unsafe_hash=True)
+class Qube:
+    key: str
+    values: ValueGroup
+    metadata: frozendict[str, np.ndarray] = field(
+        default_factory=lambda: frozendict({}), compare=False
+    )
+    children: tuple[Qube, ...] = ()
+    is_root: bool = False
+    is_leaf: bool = False
+    depth: int = field(default=0, compare=False)
+    shape: tuple[int, ...] = field(default=(), compare=False)

    @classmethod
-    def make(cls, key : str, values : Values, children, **kwargs) -> 'Qube':
+    def make_node(
+        cls,
+        key: str,
+        values: Iterable | QEnum | WildcardGroup,
+        children: Iterable[Qube],
+        metadata: Mapping[str, np.ndarray] = {},
+        is_root: bool = False,
+        is_leaf: bool | None = None,
+    ) -> Qube:
+        if isinstance(values, ValueGroup):
+            values = values
+        else:
+            values = QEnum(values)
+
+        if not isinstance(values, WildcardGroup) and not is_root:
+            assert len(values) > 0, "Nodes must have at least one value"
+
+        children = tuple(sorted(children, key=lambda n: ((n.key, n.values.min()))))
+
        return cls(
-            data = NodeData(key, values,  metadata = kwargs.get("metadata", frozendict())
-            ),
-            children = tuple(sorted(children)),
+            key,
+            values=values,
+            children=children,
+            metadata=frozendict(metadata),
+            is_root=is_root,
+            is_leaf=(not len(children)) if is_leaf is None else is_leaf,
        )

+    @classmethod
+    def make_root(cls, children: Iterable[Qube], metadata={}) -> Qube:
+        def update_depth_shape(children, depth, shape):
+            for child in children:
+                child.depth = depth + 1
+                child.shape = shape + (len(child.values),)
+                update_depth_shape(child.children, child.depth, child.shape)
+
+        update_depth_shape(children, depth=0, shape=(1,))
+
+        return cls.make_node(
+            "root",
+            values=QEnum(("root",)),
+            children=children,
+            metadata=metadata,
+            is_root=True,
+        )
+
+    def replace(self, **kwargs) -> Qube:
+        return dataclasses.replace(self, **kwargs)
+
+    def summary(self) -> str:
+        if self.is_root:
+            return self.key
+        return f"{self.key}={self.values.summary()}" if self.key != "root" else "root"

    @classmethod
-    def from_json(cls, json: dict) -> 'Qube':
-        def from_json(json: dict) -> Qube:
-            return Qube.make(
+    def load(cls, path: str | Path) -> Qube:
+        with open(path, "r") as f:
+            return Qube.from_json(json.load(f))
+
+    @classmethod
+    def from_datacube(cls, datacube: Mapping[str, str | Sequence[str]]) -> Qube:
+        key_vals = list(datacube.items())[::-1]
+
+        children: list[Qube] = []
+        for key, values in key_vals:
+            values_group: ValueGroup
+            if values == "*":
+                values_group = WildcardGroup()
+            elif isinstance(values, list):
+                values_group = QEnum(values)
+            else:
+                values_group = QEnum([values])
+
+            children = [cls.make_node(key, values_group, children)]
+
+        return cls.make_root(children)
+
+    @classmethod
+    def from_json(cls, json: dict) -> Qube:
+        def from_json(json: dict, depth=0) -> Qube:
+            return Qube.make_node(
                key=json["key"],
                values=values_from_json(json["values"]),
-                metadata=json["metadata"] if "metadata" in json else {},
-                children=tuple(from_json(c) for c in json["children"])
+                metadata=frozendict(json["metadata"]) if "metadata" in json else {},
+                children=(from_json(c, depth + 1) for c in json["children"]),
+                is_root=(depth == 0),
            )
+
        return from_json(json)

    @classmethod
-    def from_dict(cls, d: dict) -> 'Qube':
-        def from_dict(d: dict) -> tuple[Qube, ...]:
-            return tuple(Qube.make(
-                key=k.split("=")[0],
-                values=QEnum((k.split("=")[1].split("/"))),
-                children=from_dict(children)
-            ) for k, children in d.items())
+    def from_nodes(cls, nodes: dict[str, dict], add_root: bool = True):
+        return from_nodes(cls, nodes, add_root)

-        return Qube.make(key = "root",
-                              values=QEnum(("root",)),
-                              children = from_dict(d))
+    def to_json(self) -> dict:
+        def to_json(node: Qube) -> dict:
+            return {
+                "key": node.key,
+                "values": node.values.to_json(),
+                "metadata": dict(node.metadata),
+                "children": [to_json(c) for c in node.children],
+            }
+
+        return to_json(self)

    @classmethod
-    def empty(cls) -> 'Qube':
-        return cls.make("root", QEnum(("root",)), [])
+    def from_dict(cls, d: dict) -> Qube:
+        def from_dict(d: dict) -> Iterator[Qube]:
+            for k, children in d.items():
+                key, values = k.split("=")
+                values = values.split("/")
+                # children == {"..." : {}}
+                # is a special case to represent trees with leaves we don't know about
+                if frozendict(children) == frozendict({"...": {}}):
+                    yield Qube.make_node(
+                        key=key,
+                        values=values,
+                        children={},
+                        is_leaf=False,
+                    )

+                # Special case for Wildcard values
+                if values == ["*"]:
+                    values = WildcardGroup()
+                else:
+                    values = QEnum(values)

-    def __str__(self, depth = None, name = None) -> str:
-        node = dataclasses.replace(self, data = RootNodeData(key = name, values=self.values, metadata=self.metadata)) if name is not None else self
-        return "".join(node_tree_to_string(node=node, depth = depth))
+                yield Qube.make_node(
+                    key=key,
+                    values=values,
+                    children=from_dict(children),
+                )
+
+        return Qube.make_root(list(from_dict(d)))
+
+    def to_dict(self) -> dict:
+        def to_dict(q: Qube) -> tuple[str, dict]:
+            key = f"{q.key}={','.join(str(v) for v in q.values)}"
+            return key, dict(to_dict(c) for c in q.children)
+
+        return to_dict(self)[1]
+
+    @classmethod
+    def from_protobuf(cls, msg: bytes) -> Qube:
+        return proto_to_qube(cls, msg)
+
+    def to_protobuf(self) -> bytes:
+        return qube_to_proto(self)
+
+    @classmethod
+    def from_tree(cls, tree_str):
+        lines = tree_str.splitlines()
+        stack = []
+        root = {}
+
+        initial_indent = None
+        for line in lines:
+            if not line.strip():
+                continue
+            # Remove tree characters and measure indent level
+            stripped = line.lstrip(" │├└─")
+            indent = (len(line) - len(stripped)) // 4
+            if initial_indent is None:
+                initial_indent = indent
+            indent = indent - initial_indent
+
+            # Split multiple key=value parts into nested structure
+            keys = [item.strip() for item in stripped.split(",")]
+            current = bottom = {}
+            for key in reversed(keys):
+                current = {key: current}
+
+            # Adjust the stack to current indent level
+            # print(len(stack), stack)
+            while len(stack) > indent:
+                stack.pop()
+
+            if stack:
+                # Add to the dictionary at current stack level
+                parent = stack[-1]
+                key = list(current.keys())[0]
+                if key in parent:
+                    raise ValueError(
+                        f"This function doesn't yet support reading in uncompressed trees, repeated key is {key}"
+                    )
+                parent[key] = current[key]
+            else:
+                # Top level
+                key = list(current.keys())[0]
+                if root:
+                    raise ValueError(
+                        f"This function doesn't yet support reading in uncompressed trees, repeated key is {key}"
+                    )
+                root = current[key]
+
+            # Push to the stack
+            stack.append(bottom)
+
+        return cls.from_dict(root)
+
+    @classmethod
+    def empty(cls) -> Qube:
+        return Qube.make_root([])
+
+    def __str_helper__(self, depth=None, name=None) -> str:
+        node = self
+        if name is not None:
+            node = node.replace(key=name)
+        out = "".join(node_tree_to_string(node=node, depth=depth))
+        if out[-1] == "\n":
+            out = out[:-1]
+        return out
+
+    def __str__(self):
+        return self.__str_helper__()
+
+    def __repr__(self):
+        return f"Qube({self.__str_helper__()})"

    def print(self, depth=None, name: str | None = None):
-        print(self.__str__(depth = depth, name = name))
+        print(self.__str_helper__(depth=depth, name=name))

-    def html(self, depth = 2, collapse = True) -> HTML:
-        return HTML(node_tree_to_html(self, depth = depth, collapse = collapse))
+    def html(
+        self,
+        depth=2,
+        collapse=True,
+        name: str | None = None,
+        info: Callable[[Qube], str] | None = None,
+    ) -> HTML:
+        node = self
+        if name is not None:
+            node = node.replace(key=name)
+        return HTML(
+            node_tree_to_html(node=node, depth=depth, collapse=collapse, info=info)
+        )

    def _repr_html_(self) -> str:
        return node_tree_to_html(self, depth=2, collapse=True)

-    def __or__(self, other: "Qube") -> "Qube":
-        return set_operations.operation(self, other, set_operations.SetOperation.UNION)
+    # Allow "key=value/value" / qube to prepend keys
+    def __rtruediv__(self, other: str) -> Qube:
+        key, values = other.split("=")
+        values_enum = QEnum((values.split("/")))
+        return Qube.make_root([Qube.make_node(key, values_enum, self.children)])

+    def __or__(self, other: Qube) -> Qube:
+        return set_operations.operation(
+            self, other, set_operations.SetOperation.UNION, type(self)
+        )

-    def __getitem__(self, args) -> 'Qube':
+    def __and__(self, other: Qube) -> Qube:
+        return set_operations.operation(
+            self, other, set_operations.SetOperation.INTERSECTION, type(self)
+        )
+
+    def __sub__(self, other: Qube) -> Qube:
+        return set_operations.operation(
+            self, other, set_operations.SetOperation.DIFFERENCE, type(self)
+        )
+
+    def __xor__(self, other: Qube) -> Qube:
+        return set_operations.operation(
+            self, other, set_operations.SetOperation.SYMMETRIC_DIFFERENCE, type(self)
+        )
+
+    def leaves(self) -> Iterable[dict[str, str]]:
+        for value in self.values:
+            if not self.children:
+                yield {self.key: value}
+            for child in self.children:
+                for leaf in child.leaves():
+                    if self.key != "root":
+                        yield {self.key: value, **leaf}
+                    else:
+                        yield leaf
+
+    def leaf_nodes(self) -> "Iterable[tuple[dict[str, str], Qube]]":
+        for value in self.values:
+            if not self.children:
+                yield ({self.key: value}, self)
+            for child in self.children:
+                for leaf in child.leaf_nodes():
+                    if self.key != "root":
+                        yield ({self.key: value, **leaf[0]}, leaf[1])
+                    else:
+                        yield leaf
+
+    def leaves_with_metadata(
+        self, indices=()
+    ) -> Iterator[tuple[dict[str, str], dict[str, str | np.ndarray]]]:
+        if self.key == "root":
+            for c in self.children:
+                yield from c.leaves_with_metadata(indices=())
+            return
+
+        for index, value in enumerate(self.values):
+            indexed_metadata = {
+                k: vs[indices + (index,)] for k, vs in self.metadata.items()
+            }
+            indexed_metadata = {
+                k: v.item() if v.shape == () else v for k, v in indexed_metadata.items()
+            }
+            if not self.children:
+                yield {self.key: value}, indexed_metadata
+
+            for child in self.children:
+                for leaf, metadata in child.leaves_with_metadata(
+                    indices=indices + (index,)
+                ):
+                    if self.key != "root":
+                        yield {self.key: value, **leaf}, metadata | indexed_metadata
+                    else:
+                        yield leaf, metadata
+
+    def datacubes(self) -> Iterable[dict[str, Any | list[Any]]]:
+        def to_list_of_cubes(node: Qube) -> Iterable[dict[str, Any | list[Any]]]:
+            if node.key == "root":
+                for c in node.children:
+                    yield from to_list_of_cubes(c)
+
+            else:
+                if not node.children:
+                    yield {node.key: list(node.values)}
+
+                for c in node.children:
+                    for sub_cube in to_list_of_cubes(c):
+                        yield {node.key: list(node.values)} | sub_cube
+
+        return to_list_of_cubes(self)
+
+    def __getitem__(self, args) -> Qube:
+        if isinstance(args, str):
+            specifiers = args.split(",")
+            current = self
+            for specifier in specifiers:
+                key, values_str = specifier.split("=")
+                values = values_str.split("/")
+                for c in current.children:
+                    if c.key == key and set(values) == set(c.values):
+                        current = c
+                        break
+                else:
+                    raise KeyError(
+                        f"Key '{key}' not found in children of '{current.key}', available keys are {[c.key for c in current.children]}"
+                    )
+            return Qube.make_root(current.children)
+
+        elif isinstance(args, tuple) and len(args) == 2:
            key, value = args
            for c in self.children:
                if c.key == key and value in c.values:
-                data = dataclasses.replace(c.data, values = QEnum((value,)))
-                return dataclasses.replace(c, data = data)
-        raise KeyError(f"Key {key} not found in children of {self.key}")
+                    return Qube.make_root(c.children)
+            raise KeyError(f"Key '{key}' not found in children of {self.key}")
+        else:
+            raise ValueError(f"Unknown key type {args}")

    @cached_property
    def n_leaves(self) -> int:
-        return len(self.values) * (sum(c.n_leaves for c in self.children) if self.children else 1)
+        # This line makes the equation q.n_leaves + r.n_leaves == (q | r).n_leaves true is q and r have no overlap
+        if self.key == "root" and not self.children:
+            return 0
+        return len(self.values) * (
+            sum(c.n_leaves for c in self.children) if self.children else 1
+        )

    @cached_property
    def n_nodes(self) -> int:
+        if self.key == "root" and not self.children:
+            return 0
        return 1 + sum(c.n_nodes for c in self.children)

-    def transform(self, func: 'Callable[[Qube], Qube | list[Qube]]') -> 'Qube':
+    def transform(self, func: "Callable[[Qube], Qube | Iterable[Qube]]") -> Qube:
        """
        Call a function on every node of the Qube, return one or more nodes.
        If multiple nodes are returned they each get a copy of the (transformed) children of the original node.
        Any changes to the children of a node will be ignored.
        """
+
        def transform(node: Qube) -> list[Qube]:
-            children = [cc for c in node.children for cc in transform(c)]
+            children = tuple(sorted(cc for c in node.children for cc in transform(c)))
            new_nodes = func(node)
            if isinstance(new_nodes, Qube):
                new_nodes = [new_nodes]

-            return [dataclasses.replace(new_node, children = children)
-                    for new_node in new_nodes]
+            return [new_node.replace(children=children) for new_node in new_nodes]

        children = tuple(cc for c in self.children for cc in transform(c))
-        return dataclasses.replace(self, children = children)
+        return self.replace(children=children)

+    def remove_by_key(self, keys: str | list[str]):
+        _keys: list[str] = keys if isinstance(keys, list) else [keys]

-    def select(self, selection : dict[str, str | list[str]], mode: Literal["strict", "relaxed"] = "relaxed") -> 'Qube':
-        # make all values lists
-        selection = {k : v if isinstance(v, list) else [v] for k,v in selection.items()}
+        def remove_key(node: Qube) -> Qube:
+            children: list[Qube] = []
+            for c in node.children:
+                if c.key in _keys:
+                    grandchildren = tuple(sorted(remove_key(cc) for cc in c.children))
+                    grandchildren = remove_key(Qube.make_root(grandchildren)).children
+                    children.extend(grandchildren)
+                else:
+                    children.append(remove_key(c))

-        def not_none(xs): return tuple(x for x in xs if x is not None)
+            return node.replace(children=tuple(sorted(children)))

-        def select(node: Qube) -> Qube | None: 
-            # Check if the key is specified in the selection
+        return remove_key(self).compress()
+
+    def convert_dtypes(self, converters: dict[str, Callable[[Any], Any]]):
+        def convert(node: Qube) -> Qube:
+            if node.key in converters:
+                converter = converters[node.key]
+                values = [converter(v) for v in node.values]
+                new_node = node.replace(values=QEnum(values))
+                return new_node
+            return node
+
+        return self.transform(convert)
+
+    def select(
+        self,
+        selection: dict[str, str | list[str] | Callable[[Any], bool]],
+        mode: Literal["strict", "relaxed"] = "relaxed",
+        consume=False,
+    ) -> Qube:
+        # Find any bare str values and replace them with [str]
+        _selection: dict[str, list[str] | Callable[[Any], bool]] = {}
+        for k, v in selection.items():
+            if isinstance(v, list):
+                _selection[k] = v
+            elif callable(v):
+                _selection[k] = v
+            else:
+                _selection[k] = [v]
+
+        def not_none(xs):
+            return tuple(x for x in xs if x is not None)
+
+        def select(
+            node: Qube,
+            selection: dict[str, list[str] | Callable[[Any], bool]],
+            matched: bool,
+        ) -> Qube | None:
+            # If this node has no children but there are still parts of the request
+            # that have not been consumed, then prune this whole branch
+            if consume and not node.children and selection:
+                return None
+
+            # If the key isn't in the selection then what we do depends on the mode:
+            # In strict mode we just stop here
+            # In next_level mode we include the next level down so you can tell what keys to add next
+            # In relaxed mode we skip the key if it't not in the request and carry on
            if node.key not in selection:
                if mode == "strict":
                    return None
-                return dataclasses.replace(node, children = not_none(select(c) for c in node.children))

+                elif mode == "next_level":
+                    return node.replace(
+                        children=(),
+                        metadata=self.metadata
+                        | {"is_leaf": np.array([not bool(node.children)])},
+                    )
+
+                elif mode == "relaxed":
+                    pass
+                else:
+                    raise ValueError(f"Unknown mode argument {mode}")
+
+            # If the key IS in the selection then check if the values match
+            if node.key in _selection:
                # If the key is specified, check if any of the values match
-            values = QEnum((c for c in selection[node.key] if c in node.values))
+                selection_criteria = _selection[node.key]
+                if callable(selection_criteria):
+                    values = QEnum((c for c in node.values if selection_criteria(c)))
+                elif isinstance(selection_criteria, list):
+                    values = QEnum((c for c in selection_criteria if c in node.values))
+                else:
+                    raise ValueError(f"Unknown selection type {selection_criteria}")

+                # Here modes don't matter because we've explicitly filtered on this key and found nothing
                if not values:
                    return None

-            data = dataclasses.replace(node.data, values = values)
-            return dataclasses.replace(node, data=data, children = not_none(select(c) for c in node.children))
+                matched = True
+                node = node.replace(values=values)

-        return dataclasses.replace(self, children = not_none(select(c) for c in self.children))
+            if consume:
+                selection = {k: v for k, v in selection.items() if k != node.key}
+
+            # Prune nodes that had had all their children pruned
+            new_children = not_none(
+                select(c, selection, matched) for c in node.children
+            )
+
+            if node.children and not new_children:
+                return None
+
+            metadata = dict(node.metadata)
+
+            if mode == "next_level":
+                metadata["is_leaf"] = np.array([not bool(node.children)])
+
+            return node.replace(
+                children=new_children,
+                metadata=metadata,
+            )
+
+        return self.replace(
+            children=not_none(
+                select(c, _selection, matched=False) for c in self.children
+            )
+        )

    def span(self, key: str) -> list[str]:
        """
@ -162,95 +607,29 @@ class Qube:
        for c in self.children:
            for k, v in c.axes().items():
                axes[k].update(v)
+        if self.key != "root":
            axes[self.key].update(self.values)
        return dict(axes)

-    @staticmethod
-    def _insert(position: "Qube", identifier : list[tuple[str, list[str]]]):
-        """
-        This algorithm goes as follows:
-        We're at a particular node in the Qube, and we have a list of key-values pairs that we want to insert.
-        We take the first key values pair
-        key, values = identifier.pop(0)
+    def axes_info(self, depth=0) -> dict[str, AxisInfo]:
+        axes = defaultdict(
+            lambda: AxisInfo(key="", type=str, depths=set(), values=set())
+        )
+        for c in self.children:
+            for k, info in c.axes_info(depth=depth + 1).items():
+                axes[k].combine(info)

-        The general idea is to insert key, values into the current node and use recursion to handle the rest of the identifier.
+        if self.key != "root":
+            axes[self.key].combine(
+                AxisInfo(
+                    key=self.key,
+                    type=type(next(iter(self.values))),
+                    depths={depth},
+                    values=set(self.values),
+                )
+            )

-        We have two sources of values with possible overlap. The values to insert and the values attached to the children of this node.
-        For each value coming from either source we put it in one of three categories:
-            1) Values that exist only in the already existing child. (Coming exclusively from position.children)
-            2) Values that exist in both a child and the new values.
-            3) Values that exist only in the new values.
-            
-
-        Thus we add the values to insert to a set, and loop over the children.
-        For each child we partition its values into the three categories.
-
-        For 1) we create a new child node with the key, reduced set of values and the same children.
-        For 2)
-            Create a new child node with the key, and the values in group 2
-            Recurse to compute the children
-
-        Once we have finished looping over children we know all the values left over came exclusively from the new values.
-        So we:
-            Create a new node with these values.
-            Recurse to compute the children
-
-        Finally we return the node with all these new children.
-        """
-        pass
-        # if not identifier:
-        #     return position
-
-        # key, values = identifier.pop(0)
-        # # print(f"Inserting {key}={values} into {position.summary()}")
-
-        # # Only the children with the matching key are relevant.
-        # source_children = {c : [] for c in position.children if c.key == key}
-        # new_children = []
-
-        # values = set(values)
-        # for c in source_children:
-        #     values_set = set(c.values)
-        #     group_1 = values_set - values
-        #     group_2 = values_set & values
-        #     values = values - values_set # At the end of this loop values will contain only the new values
-
-        #     if group_1:
-        #         group_1_node = Qube.make(c.key, QEnum((group_1)), c.children)
-        #         new_children.append(group_1_node) # Add the unaffected part of this child
-            
-        #     if group_2:
-        #         new_node = Qube.make(key, QEnum((affected)), [])
-        #         new_node = Qube._insert(new_node, identifier)
-        #         new_children.append(new_node) # Add the affected part of this child
-
-
-        #     unaffected = [x for x in c.values if x not in affected]
-
-
-        #     if affected: # This check is not technically necessary, but it makes the code more readable
-
-
-        # # If there are any values not in any of the existing children, add them as a new child
-        # if entirely_new_values:
-        #     new_node = Qube.make(key, QEnum((entirely_new_values)), [])
-        #     new_children.append(Qube._insert(new_node, identifier))
-
-        return Qube.make(position.key, position.values, new_children)
-
-    def insert(self, identifier : dict[str, list[str]]) -> 'Qube':
-        insertion = [(k, v) for k, v in identifier.items()]
-        return Qube._insert(self, insertion)
-    
-    def to_list_of_cubes(self):
-        def to_list_of_cubes(node: Qube) -> list[list[Qube]]:
-            return [[node] + sub_cube for c in node.children for sub_cube in to_list_of_cubes(c)]
-
-        return to_list_of_cubes(self)
-
-    def info(self):
-        cubes = self.to_list_of_cubes()
-        print(f"Number of distinct paths: {len(cubes)}")
+        return dict(axes)

    @cached_property
    def structural_hash(self) -> int:
@ -258,46 +637,56 @@ class Qube:
        This hash takes into account the key, values and children's key values recursively.
        Because nodes are immutable, we only need to compute this once.
        """
+
        def hash_node(node: Qube) -> int:
-            return hash((node.key, node.values, tuple(c.structural_hash for c in node.children)))
+            return hash(
+                (node.key, node.values, tuple(c.structural_hash for c in node.children))
+            )

        return hash_node(self)

-    def compress(self) -> "Qube":
-        # First compress the children
-        new_children = [child.compress() for child in self.children]
+    def compress(self) -> Qube:
+        """
+        This method is quite computationally heavy because of trees like this:
+        root, class=d1, generation=1
+        ├── time=0600, many identical keys, param=8,78,79
+        ├── time=0600, many identical keys, param=8,78,79
+        └── time=0600, many identical keys, param=8,78,79
+        This tree compresses dow n

-        # Now take the set of new children and see if any have identical key, metadata and children
-        # the values may different and will be collapsed into a single node
-        identical_children = defaultdict(set)
-        for child in new_children:
-            # only care about the key and children of each node, ignore values
-            key = hash((child.key, tuple((cc.structural_hash for cc in child.children))))
-            identical_children[key].add(child)
+        """

-        # Now go through and create new compressed nodes for any groups that need collapsing
-        new_children = []
-        for child_set in identical_children.values():
-            if len(child_set) > 1:
-                child_set = list(child_set)
-                key = child_set[0].key
-
-                # Compress the children into a single node
-                assert all(isinstance(child.data.values, QEnum) for child in child_set), "All children must have QEnum values"
-                
-                node_data = NodeData(
-                    key = key,
-                    metadata = frozendict(), # Todo: Implement metadata compression
-                    values = QEnum((v for child in child_set for v in child.data.values.values)),
+        def union(a: Qube, b: Qube) -> Qube:
+            b = type(self).make_root(children=(b,))
+            out = set_operations.operation(
+                a, b, set_operations.SetOperation.UNION, type(self)
            )
-                new_child = Qube(data = node_data, children = child_set[0].children)
-            else:
-                # If the group is size one just keep it
-                new_child = child_set.pop()
+            return out

-            new_children.append(new_child)
-
-        return Qube(
-            data = self.data,
-            children = tuple(sorted(new_children))
+        new_children = [c.compress() for c in self.children]
+        if len(new_children) > 1:
+            new_children = list(
+                functools.reduce(union, new_children, Qube.empty()).children
            )
+
+        return self.replace(children=tuple(sorted(new_children)))
+
+    def add_metadata(self, **kwargs: dict[str, Any]):
+        metadata = {
+            k: np.array(
+                [
+                    v,
+                ]
+            )
+            for k, v in kwargs.items()
+        }
+        return self.replace(metadata=metadata)
+
+    def strip_metadata(self) -> Qube:
+        def strip(node):
+            return node.replace(metadata=frozendict({}))
+
+        return self.transform(strip)
+
+    def display(self):
+        _display(self)
--- a/src/python/qubed/init.py
+++ b/src/python/qubed/init.py
@ -1 +1,4 @@
+from . import protobuf
 from .Qube import Qube
+
+__all__ = ["Qube", "protobuf"]
--- a/src/python/qubed/main.py
+++ b/src/python/qubed/main.py
@ -1,42 +1,124 @@
-import argparse
+import json
+import time

-# A simple command line app that reads from standard input and writes to standard output
-# Arguments:
-#    --input_format=fdb/mars
-#    --output_format=text/html
-import sys
+import click
+import psutil
+from rich.console import Console
+from rich.layout import Layout
+from rich.live import Live
+from rich.panel import Panel
+from rich.spinner import Spinner
+from rich.text import Text
+
+from qubed import Qube
+from qubed.convert import parse_fdb_list
+
+console = Console(stderr=True)
+process = psutil.Process()
+
+PRINT_INTERVAL = 0.25


+@click.group()
 def main():
-    parser = argparse.ArgumentParser(description="Generate a compressed tree from various inputs.")
+    """Command-line tool for working with trees."""
+    pass

-    parser.add_argument(
-        "--input_format",
-        choices=["fdb", "mars"],
+
+@main.command()
+@click.option(
+    "--input",
+    type=click.File("r"),
+    default="-",
+    help="Specify the input file (default: standard input).",
+)
+@click.option(
+    "--output",
+    type=click.File("w"),
+    default="-",
+    help="Specify the output file (default: standard output).",
+)
+@click.option(
+    "--from",
+    "from_format",
+    type=click.Choice(["fdb", "mars"]),
    default="fdb",
-        help="Specify the input format (fdb list or mars)."
+    help="Specify the input format: fdb (fdb list --porcelain) or mars (mars list).",
 )
-    
-    parser.add_argument(
-        "--output_format",
-        choices=["text", "html"],
+@click.option(
+    "--to",
+    "to_format",
+    type=click.Choice(["text", "html", "json"]),
    default="text",
-        help="Specify the output format (text or html)."
+    help="Specify the output format: text, html, json.",
 )
+def convert(input, output, from_format, to_format):
+    """Convert trees from one format to another."""
+    q = Qube.empty()
+    t = time.time()
+    i0 = 0
+    n0 = 0
+    depth = 5
+    log = Text()
+    summary = Layout()
+    summary.split_column(
+        Layout(name="upper"),
+        Layout(name="qube"),
+    )
+    summary["upper"].split_row(
+        Layout(name="performance"),
+        Layout(log, name="log"),
+    )
+    spinner = Spinner("aesthetic", text="Performance", speed=0.3)

-    args = parser.parse_args()
+    with Live(summary, auto_refresh=False, transient=True, console=console) as live:
+        for i, datacube in enumerate(parse_fdb_list(input)):
+            new_branch = Qube.from_datacube(datacube)
+            q = q | new_branch

-    # Read from standard input
-    l = 0
-    for line in sys.stdin.readlines():
-        l += 1
+            if time.time() - t > PRINT_INTERVAL:
+                tree = q.__str__(depth=depth)
+                if tree.count("\n") > 20:
+                    depth -= 1
+                if tree.count("\n") < 5:
+                    depth += 1

+                summary["performance"].update(
+                    Panel(
+                        Text.assemble(
+                            f"The Qube has {q.n_leaves} leaves and {q.n_nodes} internal nodes so far.\n",
+                            f"{(i - i0) / (time.time() - t) / PRINT_INTERVAL:.0f} lines per second.  ",
+                            f"{(q.n_leaves - n0) / (time.time() - t):.0f} leaves per second.\n",
+                            f"Memory usage: {process.memory_info().rss / 1024 / 1024:.0f} MB\n",
+                        ),
+                        title=spinner.render(time.time()),
+                        border_style="blue",
+                    )
+                )
+                summary["qube"].update(
+                    Panel(tree, title=f"Qube (depth {depth})", border_style="blue")
+                )
+                summary["log"].update(
+                    Panel(
+                        f"{datacube}", border_style="blue", title="Last Datacube Added"
+                    )
+                )
+                live.refresh()
+                i0 = i
+                n0 = q.n_leaves
+                t = time.time()

-    # Process data (For now, just echoing the input)
-    output_data = f"[Input Format: {args.input_format}] [Output Format: {args.output_format}]\n{l} lines read from standard input\n"
+    if to_format == "text":
+        output_content = str(q)
+    elif to_format == "json":
+        output_content = json.dumps(q.to_json())
+    elif to_format == "html":
+        output_content = q.html().html
+    else:
+        output_content = str(q)
+
+    output.write(output_content)

-    # Write to standard output
-    sys.stdout.write(output_data)

 if __name__ == "__main__":
    main()
--- a/src/python/qubed/convert.py
+++ b/src/python/qubed/convert.py
@ -0,0 +1,29 @@
+def parse_key_value_pairs(text: str):
+    result = {}
+    text = text.replace("}{", ",")  # Replace segment separators
+    text = (
+        text.replace("{", "").replace("}", "").strip()
+    )  # Remove leading/trailing braces
+
+    for segment in text.split(","):
+        if "=" not in segment:
+            print(segment)
+        key, values_str = segment.split(
+            "=", 1
+        )  # Ensure split only happens at first "="
+        values = values_str.split("/")
+        result[key] = values
+
+    return result
+
+
+def parse_fdb_list(f):
+    for line in f.readlines():
+        # Handle fdb list normal
+        if line.startswith("{"):
+            yield parse_key_value_pairs(line)
+
+        # handle fdb list --compact
+        if line.startswith("retrieve,") and not line.startswith("retrieve,\n"):
+            line = line[9:]
+            yield parse_key_value_pairs(line)
--- a/src/python/qubed/fdb_schema/init.py
+++ b/src/python/qubed/fdb_schema/init.py
@ -1 +0,0 @@
-from .fdb_schema_parser import FDBSchema, FDBSchemaFile, KeySpec, Key
--- a/src/python/qubed/fdb_schema/fdb_schema_parser.py
+++ b/src/python/qubed/fdb_schema/fdb_schema_parser.py
@ -1,375 +0,0 @@
-import dataclasses
-import json
-from dataclasses import dataclass, field
-from typing import Any
-
-import pe
-from pe.actions import Pack
-from pe.operators import Class, Star
-
-from .fdb_types import FDB_type_to_implementation, FDBType
-
-
-@dataclass(frozen=True)
-class KeySpec:
-    """
-    Represents the specification of a single key in an FDB schema file. For example in
-    ```
-    [ class, expver, stream=lwda, date, time, domain?
-       [ type=ofb/mfb/oai
-               [ obsgroup, reportype ]]]
-    ```
-    class, expver, type=ofdb/mfb/oai etc are the KeySpecs
-
-    These can have additional information such as: flags like `domain?`, allowed values like `type=ofb/mfb/oai`
-    or specify type information with `date: ClimateMonthly`
-
-    """
-
-    key: str
-    type: FDBType = field(default_factory=FDBType)
-    flag: str | None = None
-    values: tuple = field(default_factory=tuple)
-    comment: str = ""
-
-    def __repr__(self):
-        repr = self.key
-        if self.flag:
-            repr += self.flag
-        # if self.type:
-        #     repr += f":{self.type}"
-        if self.values:
-            repr += "=" + "/".join(self.values)
-        return repr
-
-    def matches(self, key, value):
-        # Sanity check!
-        if self.key != key:
-            return False
-
-        # Some keys have a set of allowed values type=ofb/mfb/oai
-        if self.values:
-            if value not in self.values:
-                return False
-
-        # Check the formatting of values like Time or Date
-        if self.type and not self.type.validate(value):
-            return False
-
-        return True
-
-    def is_optional(self):
-        if self.flag is None:
-            return False
-        return "?" in self.flag
-
-    def is_allable(self):
-        if self.flag is None:
-            return False
-        return "*" in self.flag
-
-
-@dataclass(frozen=True)
-class Comment:
-    "Represents a comment node in the schema"
-
-    value: str
-
-
-@dataclass(frozen=True)
-class FDBSchemaTypeDef:
-    "Mapping between FDB schema key names and FDB Schema Types, i.e expver is of type Expver"
-
-    key: str
-    type: str
-
-
-# This is the schema grammar written in PEG format
-fdb_schema = pe.compile(
-    r"""
-    FDB < Line+ EOF
-    Line < Schema / Comment / TypeDef / empty
-
-    # Comments
-    Comment <- "#" ~non_eol*
-    non_eol              <- [\x09\x20-\x7F] / non_ascii
-    non_ascii            <- [\x80-\uD7FF\uE000-\U0010FFFF]
-
-    # Default Type Definitions
-    TypeDef < String ":" String ";"
-
-    # Schemas are the main attraction
-    # They're a tree of KeySpecs.
-    Schema < "[" KeySpecs (","? Schema)* "]"
-
-    # KeySpecs can be just a name i.e expver
-    # Can also have a type expver:int
-    # Or a flag expver?
-    # Or values expver=xxx
-    KeySpecs < KeySpec_ws ("," KeySpec_ws)*
-    KeySpec_ws < KeySpec
-    KeySpec <- key:String (flag:Flag)? (type:Type)? (values:Values)? ([ ]* comment:Comment)?
-    Flag <- ~("?" / "-" / "*")
-    Type <- ":" [ ]* String
-    Values <- "=" Value ("/" Value)*
-
-    # Low level stuff 
-    Value   <- ~([-a-zA-Z0-9_]+)
-    String   <- ~([a-zA-Z0-9_]+)
-    EOF  <- !.
-    empty <- ""
-    """,
-    actions={
-        "Schema": Pack(tuple),
-        "KeySpec": KeySpec,
-        "Values": Pack(tuple),
-        "Comment": Comment,
-        "TypeDef": FDBSchemaTypeDef,
-    },
-    ignore=Star(Class("\t\f\r\n ")),
-    # flags=pe.DEBUG,
-)
-
-
-def post_process(entries):
-    "Take the raw output from the PEG parser and split it into type definitions and schema entries."
-    typedefs = {}
-    schemas = []
-    for entry in entries:
-        match entry:
-            case c if isinstance(c, Comment):
-                pass
-            case t if isinstance(t, FDBSchemaTypeDef):
-                typedefs[t.key] = t.type
-            case s if isinstance(s, tuple):
-                schemas.append(s)
-            case _:
-                raise ValueError
-    return typedefs, tuple(schemas)
-
-
-def determine_types(types, node):
-    "Recursively walk a schema tree and insert the type information."
-    if isinstance(node, tuple):
-        return [determine_types(types, n) for n in node]
-    return dataclasses.replace(node, type=types.get(node.key, FDBType()))
-
-
-@dataclass
-class Key:
-    key: str
-    value: Any
-    key_spec: KeySpec
-    reason: str
-
-    def str_value(self):
-        return self.key_spec.type.format(self.value)
-
-    def __bool__(self):
-        return self.reason in {"Matches", "Skipped", "Select All"}
-
-    def emoji(self):
-        return {"Matches": "✅", "Skipped": "⏭️", "Select All": "★"}.get(
-            self.reason, "❌"
-        )
-
-    def info(self):
-        return f"{self.emoji()} {self.key:<12}= {str(self.value):<12} ({self.key_spec}) {self.reason if not self else ''}"
-
-    def __repr__(self):
-        return f"{self.key}={self.key_spec.type.format(self.value)}"
-
-    def as_json(self):
-        return dict(
-            key=self.key,
-            value=self.str_value(),
-            reason=self.reason,
-        )
-
-
-class FDBSchema:
-    """
-    Represents a parsed FDB Schema file.
-    Has methods to validate and convert request dictionaries to a mars request form with validation and type information.
-    """
-
-    def __init__(self, string, defaults: dict[str, str] = {}):
-        """
-        1. Use a PEG parser on a schema string,
-        2. Separate the output into schemas and typedefs
-        3. Insert any concrete implementations of types from fdb_types.py defaulting to generic string type
-        4. Walk the schema tree and annotate it with type information.
-        """
-        m = fdb_schema.match(string)
-        g = list(m.groups())
-        self._str_types, schemas = post_process(g)
-        self.types = {
-            key: FDB_type_to_implementation[type]
-            for key, type in self._str_types.items()
-        }
-        self.schemas = determine_types(self.types, schemas)
-        self.defaults = defaults
-
-    def __repr__(self):
-        return json.dumps(
-            dict(schemas=self.schemas, defaults=self.defaults), indent=4, default=repr
-        )
-
-    @classmethod
-    def consume_key(
-        cls, key_spec: KeySpec, request: dict[str, Any]
-    ) -> Key:
-        key = key_spec.key
-        try:
-            value = request[key]
-        except KeyError:
-            if key_spec.is_optional():
-                return Key(key_spec.key, "", key_spec, "Skipped")
-            if key_spec.is_allable():
-                return Key(key_spec.key, "", key_spec, "Select All")
-            else:
-                return Key(
-                    key_spec.key, "", key_spec, "Key Missing"
-                )
-
-        if key_spec.matches(key, value):
-            return Key(
-                key_spec.key,
-                key_spec.type.parse(value),
-                key_spec,
-                "Matches",
-            )
-        else:
-            return Key(
-                key_spec.key, value, key_spec, "Incorrect Value"
-            )
-
-    @classmethod
-    def _DFS_match(
-        cls, tree: list, request: dict[str, Any]
-    ) -> tuple[bool | list, list[Key]]:
-        """Do a DFS on the schema tree, returning the deepest matching path
-        At each stage return whether we matched on this path, and the path itself.
-
-        When traversing the tree there are three cases to consider:
-        1. base case []
-        2. one schema [k, k, k, [k, k, k]]
-        3. list of schemas [[k,k,k], [k,k,k], [k,k,k]]
-        """
-        #  Case 1: Base Case
-        if not tree:
-            return True, []
-
-        # Case 2: [k, k, k, [k, k, k]]
-        if isinstance(tree[0], KeySpec):
-            node, *tree = tree
-            # Check if this node is in the request
-            match_result = cls.consume_key(node, request)
-
-            # If if isn't then terminate this path here
-            if not match_result:
-                return False, [match_result,]  # fmt: skip
-
-            # Otherwise continue walking the tree and return the best result
-            matched, path = cls._DFS_match(tree, request)
-
-            # Don't put the key in the path if it's optional and we're skipping it.
-            if match_result.reason != "Skipped":
-                path = [match_result,] + path  # fmt: skip
-
-            return matched, path
-
-        # Case 3: [[k, k, k], [k, k, k]]
-        branches = []
-        for branch in tree:
-            matched, branch_path = cls._DFS_match(branch, request)
-
-            # If this branch matches, terminate the DFS and use this.
-            if matched:
-                return branch, branch_path
-            else:
-                branches.append(branch_path)
-
-        # If no branch matches, return the one with the deepest match
-        return False, max(branches, key=len)
-
-    @classmethod
-    def _DFS_match_all(
-        cls, tree: list, request: dict[str, Any]
-    ) -> list[list[Key]]:
-        """Do a DFS on the schema tree, returning all matching paths or partial matches.
-        At each stage return all matching paths and the deepest partial matches.
-
-        When traversing the tree there are three cases to consider:
-        1. base case []
-        2. one schema [k, k, k, [k, k, k]]
-        3. list of schemas [[k,k,k], [k,k,k], [k,k,k]]
-        """
-        # Case 1: Base Case
-        if not tree:
-            return [[]]
-
-        # Case 2: [k, k, k, [k, k, k]]
-        if isinstance(tree[0], KeySpec):
-            node, *tree = tree
-            # Check if this node is in the request
-            request_values = request.get(node.key, None)
-
-            if request_values is None:
-                # If the key is not in the request, return a partial match with Key Missing
-                return [[Key(node.key, "", node, "Key Missing")]]
-
-            # If the request value is a list, try to match each value
-            if isinstance(request_values, list):
-                all_matches = []
-                for value in request_values:
-                    match_result = cls.consume_key(node, {node.key: value})
-
-                    if match_result:
-                        sub_matches = cls._DFS_match_all(tree, request)
-                        for match in sub_matches:
-                            if match_result.reason != "Skipped":
-                                match.insert(0, match_result)
-                            all_matches.append(match)
-
-                return all_matches if all_matches else [[Key(node.key, "", node, "No Match Found")]]
-            else:
-                # Handle a single value
-                match_result = cls.consume_key(node, request)
-
-                # If it isn't then return a partial match with Key Missing
-                if not match_result:
-                    return [[Key(node.key, "", node, "Key Missing")]]
-
-                # Continue walking the tree and get all matches
-                all_matches = cls._DFS_match_all(tree, request)
-
-                # Prepend the current match to all further matches
-                for match in all_matches:
-                    if match_result.reason != "Skipped":
-                        match.insert(0, match_result)
-
-                return all_matches
-
-        # Case 3: [[k, k, k], [k, k, k]]
-        all_branch_matches = []
-        for branch in tree:
-            branch_matches = cls._DFS_match_all(branch, request)
-            all_branch_matches.extend(branch_matches)
-
-        # Return all of the deepest partial matches or complete matches
-        return all_branch_matches
-
-    def match_all(self, request: dict[str, Any]):
-        request = request | self.defaults
-        return self._DFS_match_all(self.schemas, request)
-
-    def match(self, request: dict[str, Any]):
-        request = request | self.defaults
-        return self._DFS_match(self.schemas, request)
-
-
-class FDBSchemaFile(FDBSchema):
-    def __init__(self, path: str):
-        with open(path, "r") as f:
-            return super().__init__(f.read())
--- a/src/python/qubed/fdb_schema/fdb_types.py
+++ b/src/python/qubed/fdb_schema/fdb_types.py
@ -1,83 +0,0 @@
-from dataclasses import dataclass
-from typing import Any
-import re
-from collections import defaultdict
-from datetime import datetime, date, time
-
-
-@dataclass(repr=False)
-class FDBType:
-    """
-    Holds information about how to format and validate a given FDB Schema type like Time or Expver
-    This base type represents a string and does no validation or formatting. It's the default type.
-    """
-
-    name: str = "String"
-
-    def __repr__(self) -> str:
-        return self.name
-
-    def validate(self, s: Any) -> bool:
-        try:
-            self.parse(s)
-            return True
-        except (ValueError, AssertionError):
-            return False
-
-    def format(self, s: Any) -> str:
-        return str(s).lower()
-
-    def parse(self, s: str) -> Any:
-        return s
-
-
-@dataclass(repr=False)
-class Expver_FDBType(FDBType):
-    name: str = "Expver"
-
-    def parse(self, s: str) -> str:
-        assert bool(re.match(".{4}", s))
-        return s
-
-
-@dataclass(repr=False)
-class Time_FDBType(FDBType):
-    name: str = "Time"
-    time_format = "%H%M"
-
-    def format(self, t: time) -> str:
-        return t.strftime(self.time_format)
-
-    def parse(self, s: datetime | str | int) -> time:
-        if isinstance(s, str):
-            assert len(s) == 4
-            return datetime.strptime(s, self.time_format).time()
-        if isinstance(s, datetime):
-            return s.time()
-        return self.parse(f"{s:04}")
-
-
-@dataclass(repr=False)
-class Date_FDBType(FDBType):
-    name: str = "Date"
-    date_format: str = "%Y%m%d"
-
-    def format(self, d: Any) -> str:
-        if isinstance(d, date):
-            return d.strftime(self.date_format)
-        if isinstance(d, int):
-            return f"{d:08}"
-        else:
-            return d
-
-    def parse(self, s: datetime | str | int) -> date:
-        if isinstance(s, str):
-            return datetime.strptime(s, self.date_format).date()
-        elif isinstance(s, datetime):
-            return s.date()
-        return self.parse(f"{s:08}")
-
-
-FDB_type_to_implementation = defaultdict(lambda: FDBType()) | {
-    cls.name: cls() for cls in [Expver_FDBType, Time_FDBType, Date_FDBType]
-}
--- a/src/python/qubed/metadata.py
+++ b/src/python/qubed/metadata.py
@ -0,0 +1,43 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Iterator
+
+import numpy as np
+
+from .value_types import QEnum
+
+if TYPE_CHECKING:
+    from .Qube import Qube
+
+
+def make_node(
+    cls,
+    key: str,
+    values: Iterator,
+    shape: list[int],
+    children: tuple[Qube, ...],
+    metadata: dict[str, np.ndarray] | None = None,
+):
+    return cls.make_node(
+        key=key,
+        values=QEnum(values),
+        metadata={k: np.array(v).reshape(shape) for k, v in metadata.items()}
+        if metadata is not None
+        else {},
+        children=children,
+    )
+
+
+def from_nodes(cls, nodes, add_root=True):
+    shape = [len(n["values"]) for n in nodes.values()]
+    nodes = nodes.items()
+    *nodes, (key, info) = nodes
+    root = make_node(cls, shape=shape, children=(), key=key, **info)
+
+    for key, info in reversed(nodes):
+        shape.pop()
+        root = make_node(cls, shape=shape, children=(root,), key=key, **info)
+
+    if add_root:
+        return cls.make_root(children=(root,))
+    return root
--- a/src/python/qubed/node_types.py
+++ b/src/python/qubed/node_types.py
@ -1,22 +0,0 @@
-from dataclasses import dataclass, field
-from typing import Hashable
-
-from frozendict import frozendict
-
-from .value_types import Values
-
-
-@dataclass(frozen=True, eq=True, order=True)
-class NodeData:
-    key: str
-    values: Values
-    metadata: dict[str, tuple[Hashable, ...]] = field(default_factory=frozendict, compare=False)
-
-    def summary(self) -> str:
-        return f"{self.key}={self.values.summary()}" if self.key != "root" else "root"
-    
-@dataclass(frozen=True, eq=True, order=True)
-class RootNodeData(NodeData):
-    "Helper class to print a custom root name"
-    def summary(self) -> str:
-        return self.key
--- a/src/python/qubed/protobuf/init.py
+++ b/src/python/qubed/protobuf/init.py
--- a/src/python/qubed/protobuf/adapters.py
+++ b/src/python/qubed/protobuf/adapters.py
@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import warnings
+from typing import TYPE_CHECKING
+
+import numpy as np
+from frozendict import frozendict
+
+from ..value_types import QEnum
+
+with warnings.catch_warnings():
+    warnings.filterwarnings(
+        "ignore",
+        "Protobuf gencode version",
+        UserWarning,
+        "google.protobuf.runtime_version",
+    )
+    from . import qube_pb2
+
+
+if TYPE_CHECKING:
+    from ..Qube import Qube
+
+
+def _ndarray_to_proto(arr: np.ndarray) -> qube_pb2.NdArray:
+    """np.ndarray → NdArray message"""
+    return qube_pb2.NdArray(
+        shape=list(arr.shape),
+        dtype=str(arr.dtype),
+        raw=arr.tobytes(order="C"),
+    )
+
+
+def _ndarray_from_proto(msg: qube_pb2.NdArray) -> np.ndarray:
+    """NdArray message → np.ndarray (immutable view)"""
+    return np.frombuffer(msg.raw, dtype=msg.dtype).reshape(tuple(msg.shape))
+
+
+def _py_to_valuegroup(value: list[str] | np.ndarray) -> qube_pb2.ValueGroup:
+    """Accept str-sequence *or* ndarray and return ValueGroup."""
+    vg = qube_pb2.ValueGroup()
+    if isinstance(value, np.ndarray):
+        vg.tensor.CopyFrom(_ndarray_to_proto(value))
+    else:
+        vg.s.items.extend(value)
+    return vg
+
+
+def _valuegroup_to_py(vg: qube_pb2.ValueGroup) -> list[str] | np.ndarray:
+    """ValueGroup → list[str]  *or* ndarray"""
+    arm = vg.WhichOneof("payload")
+    if arm == "tensor":
+        return _ndarray_from_proto(vg.tensor)
+
+    return QEnum(vg.s.items)
+
+
+def _py_to_metadatagroup(value: np.ndarray) -> qube_pb2.MetadataGroup:
+    """Accept str-sequence *or* ndarray and return ValueGroup."""
+    vg = qube_pb2.MetadataGroup()
+    if not isinstance(value, np.ndarray):
+        value = np.array([value])
+
+    vg.tensor.CopyFrom(_ndarray_to_proto(value))
+    return vg
+
+
+def _metadatagroup_to_py(vg: qube_pb2.MetadataGroup) -> np.ndarray:
+    """ValueGroup → list[str]  *or* ndarray"""
+    arm = vg.WhichOneof("payload")
+    if arm == "tensor":
+        return _ndarray_from_proto(vg.tensor)
+
+    raise ValueError(f"Unknown arm {arm}")
+
+
+def _qube_to_proto(q: Qube) -> qube_pb2.Qube:
+    """Frozen Qube dataclass → protobuf Qube message (new object)."""
+    return qube_pb2.Qube(
+        key=q.key,
+        values=_py_to_valuegroup(q.values),
+        metadata={k: _py_to_metadatagroup(v) for k, v in q.metadata.items()},
+        children=[_qube_to_proto(c) for c in q.children],
+        is_root=q.is_root,
+    )
+
+
+def qube_to_proto(q: Qube) -> bytes:
+    return _qube_to_proto(q).SerializeToString()
+
+
+def _proto_to_qube(cls: type, msg: qube_pb2.Qube) -> Qube:
+    """protobuf Qube message → frozen Qube dataclass (new object)."""
+
+    return cls.make_node(
+        key=msg.key,
+        values=_valuegroup_to_py(msg.values),
+        metadata=frozendict(
+            {k: _metadatagroup_to_py(v) for k, v in msg.metadata.items()}
+        ),
+        children=tuple(_proto_to_qube(cls, c) for c in msg.children),
+        is_root=msg.is_root,
+    )
+
+
+def proto_to_qube(cls: type, wire: bytes) -> Qube:
+    msg = qube_pb2.Qube()
+    msg.ParseFromString(wire)
+    return _proto_to_qube(cls, msg)
--- a/src/python/qubed/protobuf/qube_pb2.py
+++ b/src/python/qubed/protobuf/qube_pb2.py
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
+# source: qube.proto
+# Protobuf Python Version: 5.29.0
+"""Generated protocol buffer code."""
+
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 29, 0, "", "qube.proto"
+)
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\nqube.proto"4\n\x07NdArray\x12\r\n\x05shape\x18\x01 \x03(\x03\x12\r\n\x05\x64type\x18\x02 \x01(\t\x12\x0b\n\x03raw\x18\x03 \x01(\x0c"\x1c\n\x0bStringGroup\x12\r\n\x05items\x18\x01 \x03(\t"N\n\nValueGroup\x12\x19\n\x01s\x18\x01 \x01(\x0b\x32\x0c.StringGroupH\x00\x12\x1a\n\x06tensor\x18\x02 \x01(\x0b\x32\x08.NdArrayH\x00\x42\t\n\x07payload"6\n\rMetadataGroup\x12\x1a\n\x06tensor\x18\x01 \x01(\x0b\x32\x08.NdArrayH\x00\x42\t\n\x07payload"\xd1\x01\n\x04Qube\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1b\n\x06values\x18\x02 \x01(\x0b\x32\x0b.ValueGroup\x12%\n\x08metadata\x18\x03 \x03(\x0b\x32\x13.Qube.MetadataEntry\x12\r\n\x05\x64type\x18\x04 \x01(\t\x12\x17\n\x08\x63hildren\x18\x05 \x03(\x0b\x32\x05.Qube\x12\x0f\n\x07is_root\x18\x06 \x01(\x08\x1a?\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.MetadataGroup:\x02\x38\x01\x62\x06proto3'
+)
+
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "qube_pb2", _globals)
+if not _descriptor._USE_C_DESCRIPTORS:
+    DESCRIPTOR._loaded_options = None
+    _globals["_QUBE_METADATAENTRY"]._loaded_options = None
+    _globals["_QUBE_METADATAENTRY"]._serialized_options = b"8\001"
+    _globals["_NDARRAY"]._serialized_start = 14
+    _globals["_NDARRAY"]._serialized_end = 66
+    _globals["_STRINGGROUP"]._serialized_start = 68
+    _globals["_STRINGGROUP"]._serialized_end = 96
+    _globals["_VALUEGROUP"]._serialized_start = 98
+    _globals["_VALUEGROUP"]._serialized_end = 176
+    _globals["_METADATAGROUP"]._serialized_start = 178
+    _globals["_METADATAGROUP"]._serialized_end = 232
+    _globals["_QUBE"]._serialized_start = 235
+    _globals["_QUBE"]._serialized_end = 444
+    _globals["_QUBE_METADATAENTRY"]._serialized_start = 381
+    _globals["_QUBE_METADATAENTRY"]._serialized_end = 444
+# @@protoc_insertion_point(module_scope)
--- a/src/python/qubed/py.typed
+++ b/src/python/qubed/py.typed
--- a/src/python/qubed/set_operations.py
+++ b/src/python/qubed/set_operations.py
@ -1,75 +1,464 @@
-import dataclasses
+"""
+# Set Operations
+
+The core of this is the observation that for two sets A and B, if we compute (A - B), (A ∩ B) amd (B - A)
+then we can get the other operations by taking unions of the above three objects.
+Union: All of them
+Intersection: Just take A ∩ B
+Difference: Take either A - B or B - A
+Symmetric Difference (XOR): Take A - B and B - A
+
+We start with a shallow implementation of this algorithm that only deals with a pair of nodes, not the whole tree:
+
+shallow_set_operation(A: Qube, B: Qube) -> SetOpsResult
+
+This takes two qubes and (morally) returns (A - B), (A ∩ B) amd (B - A) but only for the values and metadata at the top level.
+
+For technical reasons that will become clear we actually return a struct with two copies of (A ∩ B). One has the metadata from A and the children of A call it A', and the other has them from B call it B'. This is relevant when we extend the shallow algorithm to work with a whole tree because we will recurse and compute the set operation for each pair of the children of A' and B'.
+
+NB: Currently there are two kinds of values, QEnums, that store a list of values and Wildcards that 'match with everything'. shallow_set_operation checks the type of values and dispatches to different methods depending on the combination of types it finds.
+
+"""
+
+from __future__ import annotations
+
 from collections import defaultdict
+from dataclasses import dataclass
 from enum import Enum

 # Prevent circular imports while allowing the type checker to know what Qube is
-from typing import TYPE_CHECKING, Iterable
+from typing import TYPE_CHECKING, Any, Iterable

-from .node_types import NodeData
-from .value_types import QEnum, Values
+import numpy as np
+from frozendict import frozendict
+
+from .value_types import QEnum, ValueGroup, WildcardGroup

 if TYPE_CHECKING:
-    from .qube import Qube
+    from .Qube import Qube


 class SetOperation(Enum):
+    "Map from set operations to which combination of (A - B), (A ∩ B), (B - A) we need."
+
    UNION = (1, 1, 1)
    INTERSECTION = (0, 1, 0)
    DIFFERENCE = (1, 0, 0)
    SYMMETRIC_DIFFERENCE = (1, 0, 1)

-def fused_set_operations(A: "Values", B: "Values") -> tuple[list[Values], list[Values], list[Values]]:
-    if isinstance(A, QEnum) and isinstance(B, QEnum):
-        set_A, set_B = set(A), set(B)
-        intersection = set_A & set_B
-        just_A = set_A - intersection
-        just_B = set_B - intersection
-        return [QEnum(just_A),], [QEnum(intersection),], [QEnum(just_B),]
+
+@dataclass(eq=True, frozen=True)
+class ValuesIndices:
+    "Helper class to hold the values and indices from a node."
+
+    values: ValueGroup
+    indices: tuple[int, ...]
+
+    @classmethod
+    def from_values(cls, values: ValueGroup):
+        return cls(values=values, indices=tuple(range(len(values))))
+
+    @classmethod
+    def empty(cls):
+        return cls(values=QEnum([]), indices=())
+
+    def enumerate(self) -> Iterable[tuple[Any, int]]:
+        return zip(self.indices, self.values)


-    raise NotImplementedError("Fused set operations on values types other than QEnum are not yet implemented")
+def get_indices(
+    metadata: frozendict[str, np.ndarray], indices: tuple[int, ...]
+) -> frozendict[str, np.ndarray]:
+    "Given a metadata dict and some indices, return a new metadata dict with only the values indexed by the indices"
+    return frozendict(
+        {k: v[..., indices] for k, v in metadata.items() if isinstance(v, np.ndarray)}
+    )

-def operation(A: "Qube", B : "Qube", operation_type: SetOperation) -> "Qube":
-    assert A.key == B.key, "The two Qube root nodes must have the same key to perform set operations," \
+
+@dataclass(eq=True, frozen=True)
+class SetOpResult:
+    """
+    Given two sets A and B, all possible set operations can be constructed from A - B, A ∩ B, B - A
+    That is, what's only in A, the intersection and what's only in B
+    However because we need to recurse on children we actually return two intersection node:
+    only_A is a qube with:
+        The values in A but not in B
+        The metadata corresponding to this values
+        All the children A had
+
+    intersection_A is a qube with:
+      The values that intersected with B
+      The metadata from that intersection
+      All the children A had
+
+    And vice versa for only_B and intersection B
+    """
+
+    only_A: ValuesIndices
+    intersection_A: ValuesIndices
+    intersection_B: ValuesIndices
+    only_B: ValuesIndices
+
+
+def shallow_qenum_set_operation(A: ValuesIndices, B: ValuesIndices) -> SetOpResult:
+    """
+    For two sets of values, partition the overlap into four groups:
+    only_A: values and indices of values that are in A but not B
+    intersection_A: values and indices of values that are in both A and B
+    And vice versa for only_B and intersection_B.
+
+    Note that intersection_A and intersection_B contain the same values but the indices are different.
+    """
+
+    # create four groups that map value -> index
+    only_A: dict[Any, int] = {val: i for i, val in A.enumerate()}
+    only_B: dict[Any, int] = {val: i for i, val in B.enumerate()}
+    intersection_A: dict[Any, int] = {}
+    intersection_B: dict[Any, int] = {}
+
+    # Go through all the values and move any that are in the intersection
+    # to the corresponding group, keeping the indices
+    for val in A.values:
+        if val in B.values:
+            intersection_A[val] = only_A.pop(val)
+            intersection_B[val] = only_B.pop(val)
+
+    def package(values_indices: dict[Any, int]) -> ValuesIndices:
+        return ValuesIndices(
+            values=QEnum(list(values_indices.keys())),
+            indices=tuple(values_indices.values()),
+        )
+
+    return SetOpResult(
+        only_A=package(only_A),
+        only_B=package(only_B),
+        intersection_A=package(intersection_A),
+        intersection_B=package(intersection_B),
+    )
+
+
+def shallow_wildcard_set_operation(A: ValuesIndices, B: ValuesIndices) -> SetOpResult:
+    """
+    WildcardGroups behave as if they contain all the values of whatever they match against.
+    For two wildcards we just return both.
+    For A == wildcard and B == enum we have to be more careful:
+        1. All of B is in the intersection so only_B is None too.
+        2. The wildcard may need to match against other things so only_A is A
+        3. We return B in the intersection_B and intersection_A slot.
+
+    This last bit happens because the wildcard basically adopts the values of whatever it sees.
+    """
+    # Two wildcard groups have full overlap.
+    if isinstance(A.values, WildcardGroup) and isinstance(B.values, WildcardGroup):
+        return SetOpResult(ValuesIndices.empty(), A, B, ValuesIndices.empty())
+
+    # If A is a wildcard matcher and B is not
+    # then the intersection is everything from B
+    if isinstance(A.values, WildcardGroup):
+        return SetOpResult(A, B, B, ValuesIndices.empty())
+
+    # If B is a wildcard matcher and A is not
+    # then the intersection is everything from A
+    if isinstance(B.values, WildcardGroup):
+        return SetOpResult(ValuesIndices.empty(), A, A, B)
+
+    raise NotImplementedError(
+        f"One of {type(A.values)} and {type(B.values)} should be WildCardGroup"
+    )
+
+
+def shallow_set_operation(
+    A: ValuesIndices,
+    B: ValuesIndices,
+) -> SetOpResult:
+    if isinstance(A.values, QEnum) and isinstance(B.values, QEnum):
+        return shallow_qenum_set_operation(A, B)
+
+    # WildcardGroups behave as if they contain all possible values.
+    if isinstance(A.values, WildcardGroup) or isinstance(B.values, WildcardGroup):
+        return shallow_wildcard_set_operation(A, B)
+
+    raise NotImplementedError(
+        f"Set operations on values types {type(A.values)} and {type(B.values)} not yet implemented"
+    )
+
+
+def operation(
+    A: Qube, B: Qube, operation_type: SetOperation, node_type, depth=0
+) -> Qube | None:
+    # print(f"operation({A}, {B})")
+    assert A.key == B.key, (
+        "The two Qube root nodes must have the same key to perform set operations,"
        f"would usually be two root nodes. They have {A.key} and {B.key} respectively"
+    )
+    node_key = A.key

-    assert A.values == B.values, f"The two Qube root nodes must have the same values to perform set operations {A.values = }, {B.values = }"
+    assert A.is_root == B.is_root
+    is_root = A.is_root
+
+    assert A.values == B.values, (
+        f"The two Qube root nodes must have the same values to perform set operations {A.values = }, {B.values = }"
+    )
+    node_values = A.values

    # Group the children of the two nodes by key
-    nodes_by_key = defaultdict(lambda : ([], []))
+    nodes_by_key: defaultdict[str, tuple[list[Qube], list[Qube]]] = defaultdict(
+        lambda: ([], [])
+    )
+    new_children: list[Qube] = []
+
+    # Sort out metadata into what can stay at this level and what must move down
+    stayput_metadata: dict[str, np.ndarray] = {}
+    pushdown_metadata_A: dict[str, np.ndarray] = {}
+    pushdown_metadata_B: dict[str, np.ndarray] = {}
+    for key in set(A.metadata.keys()) | set(B.metadata.keys()):
+        if key not in A.metadata:
+            pushdown_metadata_B[key] = B.metadata[key]
+            continue
+
+        if key not in B.metadata:
+            pushdown_metadata_A[key] = A.metadata[key]
+            continue
+
+        A_val = A.metadata[key]
+        B_val = B.metadata[key]
+        if np.allclose(A_val, B_val):
+            # print(f"{'  ' * depth}Keeping metadata key '{key}' at this level")
+            stayput_metadata[key] = A.metadata[key]
+        else:
+            # print(f"{'  ' * depth}Pushing down metadata key '{key}' {A_val} {B_val}")
+            pushdown_metadata_A[key] = A_val
+            pushdown_metadata_B[key] = B_val
+
+    # Add all the metadata that needs to be pushed down to the child nodes
+    # When pushing down the metadata we need to account for the fact it now affects more values
+    # So expand the metadata entries from shape (a, b, ..., c) to (a, b, ..., c, d)
+    # where d is the length of the node values
    for node in A.children:
+        N = len(node.values)
+        meta = {
+            k: np.broadcast_to(v[..., np.newaxis], v.shape + (N,))
+            for k, v in pushdown_metadata_A.items()
+        }
+        node = node.replace(metadata=node.metadata | meta)
        nodes_by_key[node.key][0].append(node)
+
    for node in B.children:
+        N = len(node.values)
+        meta = {
+            k: np.broadcast_to(v[..., np.newaxis], v.shape + (N,))
+            for k, v in pushdown_metadata_B.items()
+        }
+        node = node.replace(metadata=node.metadata | meta)
        nodes_by_key[node.key][1].append(node)

-    new_children = []
+    # print(f"{nodes_by_key = }")

    # For every node group, perform the set operation
    for key, (A_nodes, B_nodes) in nodes_by_key.items():
-        new_children.extend(_operation(key, A_nodes, B_nodes, operation_type))
+        output = list(
+            _operation(A_nodes, B_nodes, operation_type, node_type, depth + 1)
+        )
+        # print(f"{'  '*depth}_operation {operation_type.name} {A_nodes} {B_nodes} out = [{output}]")
+        new_children.extend(output)
+
+    # print(f"{'  '*depth}operation {operation_type.name} [{A}] [{B}] new_children = [{new_children}]")
+
+    # If there are now no children as a result of the operation, return nothing.
+    if (A.children or B.children) and not new_children:
+        if A.key == "root":
+            return node_type.make_root(children=())
+        else:
+            return None
+
+    # Whenever we modify children we should recompress them
+    # But since `operation` is already recursive, we only need to compress this level not all levels
+    # Hence we use the non-recursive _compress method
+    new_children = list(compress_children(new_children))

    # The values and key are the same so we just replace the children
-    return dataclasses.replace(A, children=new_children)
+    if A.key == "root":
+        return node_type.make_root(
+            children=new_children,
+            metadata=stayput_metadata,
+        )
+    return node_type.make_node(
+        key=node_key,
+        values=node_values,
+        children=new_children,
+        metadata=stayput_metadata,
+        is_root=is_root,
+    )


-# The root node is special so we need a helper method that we can recurse on
-def _operation(key: str, A: list["Qube"], B : list["Qube"], operation_type: SetOperation) -> Iterable["Qube"]:
+def _operation(
+    A: list[Qube],
+    B: list[Qube],
+    operation_type: SetOperation,
+    node_type,
+    depth: int,
+) -> Iterable[Qube]:
+    """
+    This operation assumes that we've found two nodes that match and now want to do a set operation on their children. Hence we take in two lists of child nodes all of which have the same key but different values.
+    We then loop over all pairs of children from each list and compute the intersection.
+    """
+    # print(f"_operation({A}, {B})")
+    keep_only_A, keep_intersection, keep_only_B = operation_type.value
+
+    # We're going to progressively remove values from the starting nodes as we do intersections
+    # So we make a node -> ValuesIndices mapping here for both a and b
+    only_a: dict[Qube, ValuesIndices] = {
+        n: ValuesIndices.from_values(n.values) for n in A
+    }
+    only_b: dict[Qube, ValuesIndices] = {
+        n: ValuesIndices.from_values(n.values) for n in B
+    }
+
+    def make_new_node(source: Qube, values_indices: ValuesIndices):
+        return source.replace(
+            values=values_indices.values,
+            metadata=get_indices(source.metadata, values_indices.indices),
+        )
+
+    # Iterate over all pairs (node_A, node_B) and perform the shallow set operation
+    # Update our copy of the original node to remove anything that appears in an intersection
    for node_a in A:
        for node_b in B:
-            just_A, intersection, just_B = fused_set_operations(
-                node_a.values, 
-                node_b.values
+            set_ops_result = shallow_set_operation(only_a[node_a], only_b[node_b])
+
+            # Save reduced values back to nodes
+            only_a[node_a] = set_ops_result.only_A
+            only_b[node_b] = set_ops_result.only_B
+
+            if (
+                set_ops_result.intersection_A.values
+                and set_ops_result.intersection_B.values
+            ):
+                result = operation(
+                    make_new_node(node_a, set_ops_result.intersection_A),
+                    make_new_node(node_b, set_ops_result.intersection_B),
+                    operation_type,
+                    node_type,
+                    depth=depth + 1,
+                )
+                if result is not None:
+                    # If we're doing a difference or xor we might want to throw away the intersection
+                    # However we can only do this once we get to the leaf nodes, otherwise we'll
+                    # throw away nodes too early!
+                    # Consider Qube(root, a=1, b=1/2) - Qube(root, a=1, b=1)
+                    # We can easily throw away the whole a node by accident here!
+                    if keep_intersection or result.children:
+                        yield result
+            elif (
+                not set_ops_result.intersection_A.values
+                and not set_ops_result.intersection_B.values
+            ):
+                continue
+            else:
+                raise ValueError(
+                    f"Only one of set_ops_result.intersection_A and set_ops_result.intersection_B is None, I didn't think that could happen! {set_ops_result = }"
                )
-            for values in just_A:
-                data = NodeData(key, values, {})
-                yield type(node_a)(data, node_a.children)

-            if intersection:
-                intersected_children = operation(node_a, node_b, operation_type)
-                for values in intersection:
-                    data = NodeData(key, values, {})
-                    yield type(node_a)(data, intersected_children)
+    if keep_only_A:
+        for node, vi in only_a.items():
+            if vi.values:
+                yield make_new_node(node, vi)

-            for values in just_B:
-                data = NodeData(key, values, {})
-                yield type(node_a)(data, node_b.children)
+    if keep_only_B:
+        for node, vi in only_b.items():
+            if vi.values:
+                yield make_new_node(node, vi)
+
+
+def compress_children(children: Iterable[Qube], depth=0) -> tuple[Qube, ...]:
+    """
+    Helper method tht only compresses a set of nodes, and doesn't do it recursively.
+    Used in Qubed.compress but also to maintain compression in the set operations above.
+    """
+    # Take the set of new children and see if any have identical key, metadata and children
+    # the values may different and will be collapsed into a single node
+
+    identical_children = defaultdict(list)
+    for child in children:
+        # only care about the key and children of each node, ignore values
+        h = hash((child.key, tuple((cc.structural_hash for cc in child.children))))
+        identical_children[h].append(child)
+
+    # Now go through and create new compressed nodes for any groups that need collapsing
+    new_children = []
+    for child_list in identical_children.values():
+        # If the group is size one just keep it
+        if len(child_list) == 1:
+            new_child = child_list.pop()
+
+        else:
+            example = child_list[0]
+            node_type = type(example)
+            value_type = type(example.values)
+
+            assert all(isinstance(child.values, value_type) for child in child_list), (
+                f"All nodes to be grouped must have the same value type, expected {value_type}"
+            )
+
+            # We know the children of this group of nodes all have the same structure
+            # but we still need to merge the metadata across them
+            # children = example.children
+            children = merge_metadata(child_list, example.depth)
+
+            # Do we need to recusively compress here?
+            # children = compress_children(children, depth=depth+1)
+
+            if value_type is QEnum:
+                values = QEnum(set(v for child in child_list for v in child.values))
+            elif value_type is WildcardGroup:
+                values = example.values
+            else:
+                raise ValueError(f"Unknown value type: {value_type}")
+
+            new_child = node_type.make_node(
+                key=example.key,
+                metadata=example.metadata,
+                values=values,
+                children=children,
+            )
+
+        new_children.append(new_child)
+
+    return tuple(sorted(new_children, key=lambda n: ((n.key, n.values.min()))))
+
+
+def merge_metadata(qubes: list[Qube], axis) -> Iterable[Qube]:
+    """
+    Given a list of qubes with identical structure,
+    match up the children of each node and merge the metadata
+    """
+    # Group the children of each qube and merge them
+    # Exploit the fact that they have the same shape and ordering
+    example = qubes[0]
+    node_type = type(example)
+
+    for i in range(len(example.children)):
+        group = [q.children[i] for q in qubes]
+        group_example = group[0]
+        assert len(set((c.structural_hash for c in group))) == 1
+
+        # Collect metadata by key
+        metadata_groups = {
+            k: [q.metadata[k] for q in group] for k in group_example.metadata.keys()
+        }
+
+        # Concatenate the metadata together
+        metadata: frozendict[str, np.ndarray] = frozendict(
+            {
+                k: np.concatenate(metadata_group, axis=axis)
+                for k, metadata_group in metadata_groups.items()
+            }
+        )
+
+        group_children = merge_metadata(group, axis)
+        yield node_type.make_node(
+            key=group_example.key,
+            metadata=metadata,
+            values=group_example.values,
+            children=group_children,
+        )
--- a/src/python/qubed/tree_formatters.py
+++ b/src/python/qubed/tree_formatters.py
@ -1,32 +1,42 @@
+from __future__ import annotations
+
 import random
 from dataclasses import dataclass
-from typing import Iterable, Protocol, Sequence, runtime_checkable
+from typing import TYPE_CHECKING, Callable, Iterable

+try:
+    from IPython.display import display
+except ImportError:
+    display = None

-@runtime_checkable
-class TreeLike(Protocol):
-    @property
-    def children(self) -> Sequence["TreeLike"]: ...  # Supports indexing like node.children[i]
+if TYPE_CHECKING:
+    from .Qube import Qube

-    def summary(self, **kwargs) -> str: ...

@dataclass(frozen=True)
-class HTML():
+class HTML:
    html: str
+
    def _repr_html_(self):
        return self.html

-def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, TreeLike]:
+
+def summarize_node(
+    node: Qube, collapse=False, max_summary_length=50, **kwargs
+) -> tuple[str, str, Qube]:
    """
    Extracts a summarized representation of the node while collapsing single-child paths.
    Returns the summary string and the last node in the chain that has multiple children.
    """
    summaries = []
+    paths = []

    while True:
        summary = node.summary(**kwargs)
-        if len(summary) > 50:
-            summary = summary[:50] + "..."
+
+        paths.append(summary)
+        if len(summary) > max_summary_length:
+            summary = summary[:max_summary_length] + "..."
        summaries.append(summary)
        if not collapse:
            break
@ -36,10 +46,15 @@ def summarize_node(node: TreeLike, collapse = False, **kwargs) -> tuple[str, Tre
            break
        node = node.children[0]

-    return ", ".join(summaries), node
+    # Add a "..." to represent nodes that we don't know about
+    if (not node.children) and (not node.is_leaf):
+        summaries.append("...")

-def node_tree_to_string(node : TreeLike, prefix : str = "", depth = None) -> Iterable[str]:
-    summary, node = summarize_node(node)
+    return ", ".join(summaries), ",".join(paths), node
+
+
+def node_tree_to_string(node: Qube, prefix: str = "", depth=None) -> Iterable[str]:
+    summary, path, node = summarize_node(node)

    if depth is not None and depth <= 0:
        yield summary + " - ...\n"
@ -56,30 +71,110 @@ def node_tree_to_string(node : TreeLike, prefix : str = "", depth = None) -> Ite
        connector = "└── " if index == len(node.children) - 1 else "├── "
        yield prefix + connector
        extension = "    " if index == len(node.children) - 1 else "│   "
-        yield from node_tree_to_string(child, prefix + extension, depth = depth - 1 if depth is not None else None)
+        yield from node_tree_to_string(
+            child, prefix + extension, depth=depth - 1 if depth is not None else None
+        )

-def _node_tree_to_html(node : TreeLike, prefix : str = "", depth = 1, connector = "", **kwargs) -> Iterable[str]:
-    summary, node = summarize_node(node, **kwargs)
+
+def summarize_node_html(
+    node: Qube,
+    collapse=False,
+    max_summary_length=50,
+    info: Callable[[Qube], str] | None = None,
+    **kwargs,
+) -> tuple[str, Qube]:
+    """
+    Extracts a summarized representation of the node while collapsing single-child paths.
+    Returns the summary string and the last node in the chain that has multiple children.
+    """
+    if info is None:
+
+        def info_func(node: Qube, /):
+            return (
+                # f"dtype: {node.dtype}\n"
+                f"metadata: {dict(node.metadata)}\n"
+            )
+    else:
+        info_func = info
+
+    summaries = []
+
+    while True:
+        path = node.summary(**kwargs)
+        summary = path
+
+        if len(summary) > max_summary_length:
+            summary = summary[:max_summary_length] + "..."
+
+        info_string = info_func(node)
+
+        summary = f'<span class="qubed-node" data-path="{path}" title="{info_string}">{summary}</span>'
+        summaries.append(summary)
+        if not collapse:
+            break
+
+        # Move down if there's exactly one child, otherwise stop
+        if len(node.children) != 1:
+            break
+        node = node.children[0]
+
+    if (not node.children) and (not node.is_leaf):
+        summary = (
+            '<span class="qubed-node" data-path="" title="Truncated Nodes">...</span>'
+        )
+        summaries.append(summary)
+
+    return ", ".join(summaries), node
+
+
+def _node_tree_to_html(
+    node: Qube,
+    prefix: str = "",
+    depth=1,
+    connector="",
+    info: Callable[[Qube], str] | None = None,
+    **kwargs,
+) -> Iterable[str]:
+    summary, node = summarize_node_html(node, info=info, **kwargs)

    if len(node.children) == 0:
-        yield f'<span class="leaf">{connector}{summary}</span>'
+        yield f'<span class="qubed-level">{connector}{summary}</span>'
        return
    else:
        open = "open" if depth > 0 else ""
-        yield f"<details {open}><summary>{connector}{summary}</summary>"
+        yield f'<details {open}><summary class="qubed-level">{connector}{summary}</summary>'

    for index, child in enumerate(node.children):
        connector = "└── " if index == len(node.children) - 1 else "├── "
        extension = "    " if index == len(node.children) - 1 else "│   "
-        yield from _node_tree_to_html(child, prefix + extension, depth = depth - 1, connector = prefix+connector, **kwargs)
+        yield from _node_tree_to_html(
+            child,
+            prefix + extension,
+            depth=depth - 1,
+            connector=prefix + connector,
+            info=info,
+            **kwargs,
+        )
    yield "</details>"

-def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
+
+def node_tree_to_html(
+    node: Qube,
+    depth=1,
+    include_css=True,
+    include_js=True,
+    css_id=None,
+    info: Callable[[Qube], str] | None = None,
+    **kwargs,
+) -> str:
+    if css_id is None:
        css_id = f"qubed-tree-{random.randint(0, 1000000)}"
-        css = f"""
+
+    # It's ugle to use an f string here because css uses {} so much so instead
+    # we use CSS_ID as a placeholder and replace it later
+    css = """
        <style>
-        pre#{css_id} """ \
-        """{
+        pre#CSS_ID {
            font-family: monospace;
            white-space: pre;
            font-family: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;
@ -90,6 +185,11 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
                margin-left: 0;
            }

+            .qubed-level a {
+                margin-left: 10px;
+                text-decoration: none;
+            }
+
            summary {
                list-style: none;
                cursor: pointer;
@ -99,7 +199,7 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
                display: block;
            }

-            summary:hover,span.leaf:hover {
+            span.qubed-node:hover {
                background-color: #f0f0f0;
            }

@ -111,7 +211,7 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:
                content: " ▼";
            }

-            .leaf {
+            .qubed-level {
                text-overflow: ellipsis;
                overflow: hidden;
                text-wrap: nowrap;
@ -125,6 +225,47 @@ def node_tree_to_html(node : TreeLike, depth = 1, **kwargs) -> str:

        }
        </style>
+        """.replace("CSS_ID", css_id)
+
+    # This js snippet copies the path of a node to the clipboard when clicked
+    js = """
+        <script type="module" defer>
+        async function nodeOnClick(event) {
+            if (!event.altKey) return;
+            event.preventDefault();
+            let current_element = this.parentElement;
+            let paths = [];
+            while (true) {
+                if (current_element.dataset.path) {
+                    paths.push(current_element.dataset.path);
+                }
+                current_element = current_element.parentElement;
+                if (current_element.tagName == "PRE") break;
+            }
+            const path = paths.reverse().slice(1).join(",");
+            await navigator.clipboard.writeText(path);
+        }
+
+        const nodes = document.querySelectorAll("#CSS_ID.qubed-node");
+        nodes.forEach(n => n.addEventListener("click", nodeOnClick));
+        </script>
+        """.replace("CSS_ID", css_id)
+    nodes = "".join(_node_tree_to_html(node=node, depth=depth, info=info, **kwargs))
+    return f"{js if include_js else ''}{css if include_css else ''}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"
+
+
+def _display(qube: Qube, **kwargs):
+    if display is None:
+        print(qube)
+    else:
+
+        def info(node: Qube):
+            return f"""\
+structural_hash = {node.structural_hash}
+metadata = {dict(node.metadata)}
+is_root = {node.is_root}
+is_leaf = {node.is_leaf}
 """
-        nodes = "".join(_node_tree_to_html(node=node, depth=depth, **kwargs))
-        return f"{css}<pre class='qubed-tree' id='{css_id}'>{nodes}</pre>"
+
+        kwargs = {"info": info} | kwargs
+        display(qube.html(**kwargs))
--- a/src/python/qubed/trie.py
+++ b/src/python/qubed/trie.py
@ -2,8 +2,9 @@ from dataclasses import dataclass, field

 character = str

+
@dataclass(unsafe_hash=True)
-class TrieNode():
+class TrieNode:
    parent: "TrieNode | None"
    parent_char: character
    children: dict[character, "TrieNode"] = field(default_factory=dict)
@ -37,4 +38,3 @@ class Trie:
            leaf_node = leaf_node.parent

        return "".join(reversed(string))
-
--- a/src/python/qubed/value_types.py
+++ b/src/python/qubed/value_types.py
@ -1,40 +1,102 @@
+from __future__ import annotations
+
 import dataclasses
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
-from typing import Any, FrozenSet, Iterable, Literal, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    FrozenSet,
+    Iterable,
+    Iterator,
+    Literal,
+    Sequence,
+    TypeVar,
+)
+
+if TYPE_CHECKING:
+    from .Qube import Qube


@dataclass(frozen=True)
-class Values(ABC):
+class ValueGroup(ABC):
+    @abstractmethod
+    def dtype(self) -> str:
+        "Provide a string rep of the datatype of these values"
+        pass
+
    @abstractmethod
    def summary(self) -> str:
-        pass
-    @abstractmethod
-    def __len__(self) -> int:
+        "Provide a string summary of the value group."
        pass

    @abstractmethod
    def __contains__(self, value: Any) -> bool:
+        "Given a value, coerce to the value type and determine if it is in the value group."
        pass

    @abstractmethod
-    def from_strings(self, values: Iterable[str]) -> list['Values']:
+    def to_json(self) -> dict:
+        "Return a JSON serializable representation of the value group."
        pass

+    @abstractmethod
+    def min(self):
+        "Return the minimum value in the group."
+        pass
+
+    @classmethod
+    @abstractmethod
+    def from_strings(cls, values: Iterable[str]) -> Sequence[ValueGroup]:
+        "Given a list of strings, return a one or more ValueGroups of this type."
+        pass
+
+    @abstractmethod
+    def __iter__(self) -> Iterator:
+        "Iterate over the values in the group."
+        pass
+
+    @abstractmethod
+    def __len__(self) -> int:
+        pass
+
+
 T = TypeVar("T")
 EnumValuesType = FrozenSet[T]
-@dataclass(frozen=True, order=True)

-class QEnum(Values):
+_dtype_map: dict[str, type] = {
+    "str": str,
+    "int64": int,
+    "float64": float,
+    "date": datetime,
+}
+_dtype_map_inv: dict[type, str] = {v: k for k, v in _dtype_map.items()}
+_dtype_formatters = {
+    "str": str,
+    "int64": int,
+    "float64": float,
+    "date": datetime.fromisoformat,
+}
+
+
+@dataclass(frozen=True, order=True)
+class QEnum(ValueGroup):
    """
    The simplest kind of key value is just a list of strings.
    summary -> string1/string2/string....
    """
-    values: EnumValuesType

-    def __init__(self, obj):
-       object.__setattr__(self, 'values', frozenset(obj))
+    values: EnumValuesType
+    _dtype: str = "str"
+
+    def __init__(self, obj, dtype="str"):
+        object.__setattr__(self, "values", tuple(sorted(obj)))
+        object.__setattr__(
+            self,
+            "_dtype",
+            dtype,
+        )

    def __post_init__(self):
        assert isinstance(self.values, tuple)
@ -44,17 +106,98 @@ class QEnum(Values):

    def __len__(self) -> int:
        return len(self.values)
+
    def summary(self) -> str:
-        return '/'.join(map(str, sorted(self.values)))
+        return "/".join(map(str, sorted(self.values)))
+
    def __contains__(self, value: Any) -> bool:
        return value in self.values
-    def from_strings(self, values: Iterable[str]) -> list['Values']:
-        return [type(self)(tuple(values))]
+
+    def dtype(self):
+        return self._dtype
+
+    @classmethod
+    def from_strings(cls, values: Iterable[str]) -> Sequence[ValueGroup]:
+        return [cls(tuple(values))]
+
+    def min(self):
+        return min(self.values)
+
+    def to_json(self):
+        return {"type": "enum", "dtype": self.dtype(), "values": self.values}
+
+    # @classmethod
+    # def from_json(cls, type: Literal["enum"], dtype: str, values: list):
+    #     dtype_formatter = _dtype_formatters[dtype]
+
+    @classmethod
+    def from_list(cls, obj):
+        example = obj[0]
+        dtype = type(example)
+        assert [type(v) is dtype for v in obj]
+        return cls(obj, dtype=_dtype_map_inv[dtype])
+
+
+@dataclass(frozen=True, order=True)
+class WildcardGroup(ValueGroup):
+    def summary(self) -> str:
+        return "*"
+
+    def __contains__(self, value: Any) -> bool:
+        return True
+
+    def to_json(self):
+        return "*"
+
+    def min(self):
+        return "*"
+
+    def __len__(self):
+        return 1
+
+    def __iter__(self):
+        return ["*"]
+
+    def __bool__(self):
+        return True
+
+    def dtype(self):
+        return "*"
+
+    @classmethod
+    def from_strings(cls, values: Iterable[str]) -> Sequence[ValueGroup]:
+        return [WildcardGroup()]
+
+
+class DateEnum(QEnum):
+    def summary(self) -> str:
+        def fmt(d):
+            return d.strftime("%Y%m%d")
+
+        return "/".join(map(fmt, sorted(self.values)))
+

@dataclass(frozen=True)
-class Range(Values, ABC):
+class Range(ValueGroup, ABC):
    dtype: str = dataclasses.field(kw_only=True)

+    start: Any
+    end: Any
+    step: Any
+
+    def min(self):
+        return self.start
+
+    def __iter__(self) -> Iterator[Any]:
+        i = self.start
+        while i <= self.end:
+            yield i
+            i += self.step
+
+    def to_json(self):
+        return dataclasses.asdict(self)
+
+
@dataclass(frozen=True)
 class DateRange(Range):
    start: date
@ -62,55 +205,96 @@ class DateRange(Range):
    step: timedelta
    dtype: Literal["date"] = dataclasses.field(kw_only=True, default="date")

+    def __len__(self) -> int:
+        return (self.end - self.start) // self.step
+
+    def __iter__(self) -> Iterator[date]:
+        current = self.start
+        while current <= self.end if self.step.days > 0 else current >= self.end:
+            yield current
+            current += self.step
+
    @classmethod
-    def from_strings(self, values: Iterable[str]) -> list['DateRange']:
+    def from_strings(cls, values: Iterable[str]) -> Sequence[DateRange | DateEnum]:
        dates = sorted([datetime.strptime(v, "%Y%m%d") for v in values])
        if len(dates) < 2:
-            return [DateRange(
-                start=dates[0],
-                end=dates[0],
-                step=timedelta(days=0)
-            )]
+            return [DateEnum(dates)]

-        ranges = []
-        current_range, dates = [dates[0],], dates[1:]
+        ranges: list[DateEnum | DateRange] = []
+        current_group, dates = (
+            [
+                dates[0],
+            ],
+            dates[1:],
+        )
+        current_type: Literal["enum", "range"] = "enum"
        while len(dates) > 1:
-            if dates[0] - current_range[-1] == timedelta(days=1):
-                current_range.append(dates.pop(0))
+            if current_type == "range":
+                # If the next date fits then add it to the current range
+                if dates[0] - current_group[-1] == timedelta(days=1):
+                    current_group.append(dates.pop(0))

-            elif len(current_range) == 1:
-                ranges.append(DateRange(
-                start=current_range[0],
-                end=current_range[0],
-                step=timedelta(days=0)
-                ))
-                current_range = [dates.pop(0),]
+                # Emit the current range and start a new one
+                else:
+                    if len(current_group) == 1:
+                        ranges.append(DateEnum(current_group))
+                    else:
+                        ranges.append(
+                            DateRange(
+                                start=current_group[0],
+                                end=current_group[-1],
+                                step=timedelta(days=1),
+                            )
+                        )
+                    current_group = [
+                        dates.pop(0),
+                    ]
+                    current_type = "enum"
+
+            if current_type == "enum":
+                # If the next date is one more than the last then switch to range mode
+                if dates[0] - current_group[-1] == timedelta(days=1):
+                    last = current_group.pop()
+                    if current_group:
+                        ranges.append(DateEnum(current_group))
+                    current_group = [last, dates.pop(0)]
+                    current_type = "range"

                else:
-                ranges.append(DateRange(
-                start=current_range[0],
-                end=current_range[-1],
-                step=timedelta(days=1)
-                ))
-                current_range = [dates.pop(0),]
+                    current_group.append(dates.pop(0))
+
+        # Handle remaining `current_group`
+        if current_group:
+            if current_type == "range":
+                ranges.append(
+                    DateRange(
+                        start=current_group[0],
+                        end=current_group[-1],
+                        step=timedelta(days=1),
+                    )
+                )
+            else:
+                ranges.append(DateEnum(current_group))
+
        return ranges

    def __contains__(self, value: Any) -> bool:
        v = datetime.strptime(value, "%Y%m%d").date()
        return self.start <= v <= self.end and (v - self.start) % self.step == 0

-
-    def __len__(self) -> int:
-        return (self.end - self.start) // self.step
-    
    def summary(self) -> str:
-        def fmt(d): return d.strftime("%Y%m%d")
+        def fmt(d):
+            return d.strftime("%Y%m%d")
+
        if self.step == timedelta(days=0):
            return f"{fmt(self.start)}"
        if self.step == timedelta(days=1):
            return f"{fmt(self.start)}/to/{fmt(self.end)}"

-        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
+        return (
+            f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step // timedelta(days=1)}"
+        )
+

@dataclass(frozen=True)
 class TimeRange(Range):
@ -119,44 +303,53 @@ class TimeRange(Range):
    step: int
    dtype: Literal["time"] = dataclasses.field(kw_only=True, default="time")

+    def min(self):
+        return self.start
+
+    def __iter__(self) -> Iterator[Any]:
+        return super().__iter__()
+
    @classmethod
-    def from_strings(self, values: Iterable[str]) -> list['TimeRange']:
+    def from_strings(self, values: Iterable[str]) -> list["TimeRange"]:
        times = sorted([int(v) for v in values])
        if len(times) < 2:
-            return [TimeRange(
-                start=times[0],
-                end=times[0],
-                step=100
-            )]
+            return [TimeRange(start=times[0], end=times[0], step=100)]

        ranges = []
-        current_range, times = [times[0],], times[1:]
+        current_range, times = (
+            [
+                times[0],
+            ],
+            times[1:],
+        )
        while len(times) > 1:
            if times[0] - current_range[-1] == 1:
                current_range.append(times.pop(0))

            elif len(current_range) == 1:
-                ranges.append(TimeRange(
-                start=current_range[0],
-                end=current_range[0],
-                step=0
-                ))
-                current_range = [times.pop(0),]
+                ranges.append(
+                    TimeRange(start=current_range[0], end=current_range[0], step=0)
+                )
+                current_range = [
+                    times.pop(0),
+                ]

            else:
-                ranges.append(TimeRange(
-                start=current_range[0],
-                end=current_range[-1],
-                step=1
-                ))
-                current_range = [times.pop(0),]
+                ranges.append(
+                    TimeRange(start=current_range[0], end=current_range[-1], step=1)
+                )
+                current_range = [
+                    times.pop(0),
+                ]
        return ranges

    def __len__(self) -> int:
        return (self.end - self.start) // self.step

    def summary(self) -> str:
-        def fmt(d): return f"{d:04d}"
+        def fmt(d):
+            return f"{d:04d}"
+
        if self.step == 0:
            return f"{fmt(self.start)}"
        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"
@ -165,6 +358,7 @@ class TimeRange(Range):
        v = int(value)
        return self.start <= v <= self.end and (v - self.start) % self.step == 0

+
@dataclass(frozen=True)
 class IntRange(Range):
    start: int
@ -176,7 +370,11 @@ class IntRange(Range):
        return (self.end - self.start) // self.step

    def summary(self) -> str:
-        def fmt(d): return d.strftime("%Y%m%d")
+        def fmt(d):
+            return d
+
+        if self.step == 0:
+            return f"{fmt(self.start)}"
        return f"{fmt(self.start)}/to/{fmt(self.end)}/by/{self.step}"

    def __contains__(self, value: Any) -> bool:
@ -184,44 +382,62 @@ class IntRange(Range):
        return self.start <= v <= self.end and (v - self.start) % self.step == 0

    @classmethod
-    def from_strings(self, values: Iterable[str]) -> list['IntRange']:
+    def from_strings(self, values: Iterable[str]) -> list["IntRange"]:
        ints = sorted([int(v) for v in values])
        if len(ints) < 2:
-            return [IntRange(
-                start=ints[0],
-                end=ints[0],
-                step=0
-            )]
+            return [IntRange(start=ints[0], end=ints[0], step=0)]

        ranges = []
-        current_range, ints = [ints[0],], ints[1:]
+        current_range, ints = (
+            [
+                ints[0],
+            ],
+            ints[1:],
+        )
        while len(ints) > 1:
            if ints[0] - current_range[-1] == 1:
                current_range.append(ints.pop(0))

            elif len(current_range) == 1:
-                ranges.append(IntRange(
-                start=current_range[0],
-                end=current_range[0],
-                step=0
-                ))
-                current_range = [ints.pop(0),]
+                ranges.append(
+                    IntRange(start=current_range[0], end=current_range[0], step=0)
+                )
+                current_range = [
+                    ints.pop(0),
+                ]

            else:
-                ranges.append(IntRange(
-                start=current_range[0],
-                end=current_range[-1],
-                step=1
-                ))
-                current_range = [ints.pop(0),]
+                ranges.append(
+                    IntRange(start=current_range[0], end=current_range[-1], step=1)
+                )
+                current_range = [
+                    ints.pop(0),
+                ]
        return ranges

-def values_from_json(obj) -> Values:
-    if isinstance(obj, list): 
-        return QEnum(tuple(obj))

-    match obj["dtype"]:
-        case "date": return DateRange(**obj)
-        case "time": return TimeRange(**obj)
-        case "int": return IntRange(**obj)
-        case _: raise ValueError(f"Unknown dtype {obj['dtype']}")
+def values_from_json(obj: dict | list) -> ValueGroup:
+    if isinstance(obj, list):
+        return QEnum.from_list(obj)
+
+    match obj["type"]:
+        case "enum":
+            QEnum.from_json(**obj)
+        case _:
+            raise ValueError(f"Unknown dtype {obj['dtype']}")
+
+
+def convert_datatypes(q: "Qube", conversions: dict[str, ValueGroup]) -> "Qube":
+    def _convert(q: "Qube") -> Iterator["Qube"]:
+        if q.key in conversions:
+            data_type = conversions[q.key]
+            assert isinstance(q.values, QEnum), (
+                "Only QEnum values can be converted to other datatypes."
+            )
+            for values_group in data_type.from_strings(q.values):
+                # print(values_group)
+                yield q.replace(values=values_group)
+        else:
+            yield q
+
+    return q.transform(_convert)
--- a/src/qube.proto
+++ b/src/qube.proto
@ -0,0 +1,32 @@
+syntax = "proto3";
+
+message NdArray {
+  repeated int64 shape = 1;
+  string  dtype  = 2;
+  bytes   raw    = 3;
+}
+
+message StringGroup {repeated string items = 1; }
+
+// Stores values i.e class=1/2/3 the 1/2/3 part
+message ValueGroup {
+    oneof payload {
+    StringGroup s = 1;
+    NdArray tensor = 2;
+  }
+}
+
+message MetadataGroup {
+    oneof payload {
+    NdArray tensor = 1;
+  }
+}
+
+message Qube {
+  string key          = 1;
+  ValueGroup values   = 2;
+  map<string, MetadataGroup> metadata = 3;
+  string dtype        = 4;
+  repeated Qube children = 5;
+  bool is_root        = 6;
+}
--- a/src/rust/compressed_tree.rs
+++ b/src/rust/compressed_tree.rs
@ -1,334 +0,0 @@
-#![allow(dead_code)]
-
-use std::rc::Rc;
-use smallstr::SmallString;
-
-use slotmap::{new_key_type, SlotMap};
-
-new_key_type! {
-    struct NodeId;
-}
-
-type CompactString = SmallString<[u8; 16]>;
-
-#[derive(Clone)]
-enum NodeValueTypes {
-    String(CompactString),
-    Int(i32),
-}
-
-impl From<&str> for NodeValueTypes {
-    fn from(s: &str) -> Self {
-        NodeValueTypes::String(CompactString::from(s))
-    }
-}
-
-impl From<i32> for NodeValueTypes {
-    fn from(i: i32) -> Self {
-        NodeValueTypes::Int(i)
-    }
-}
-
-enum NodeValue {
-    Single(NodeValueTypes),
-    Multiple(Vec<NodeValueTypes>),
-}
-
-struct Node<Payload> {
-    key: Rc<String>,
-    value: NodeValue,
-    parent: Option<NodeId>,
-    prev_sibling: Option<NodeId>,
-    next_sibling: Option<NodeId>,
-    // vector may be faster for traversal, but linkedlist should be faster for insertion
-    children: Option<(NodeId, NodeId)>, // (first_child, last_child)
-    data: Option<Payload>,
-}
-
-struct QueryTree<Payload> {
-    nodes: SlotMap<NodeId, Node<Payload>>,
-}
-
-impl<Payload> QueryTree<Payload> {
-    fn new() -> Self {
-        QueryTree {
-            nodes: SlotMap::with_key(),
-        }
-    }
-
-    // Adds a node with a key and single value
-    fn add_node<S>(&mut self, key: &Rc<String>, value: S, parent: Option<NodeId>) -> NodeId
-    where
-        S: Into<NodeValueTypes>,
-    {
-        let node_id = self.nodes.insert_with_key(|_| Node {
-            key: Rc::clone(key),
-            value: NodeValue::Single(value.into()),
-            parent,
-            prev_sibling: None,
-            next_sibling: None,
-            children: None,
-            data: None,
-        });
-
-        if let Some(parent_id) = parent {
-            // Determine if parent has existing children
-            if let Some((first_child_id, last_child_id)) = self.nodes[parent_id].children {
-                // Update the last child's `next_sibling`
-                {
-                    let last_child = &mut self.nodes[last_child_id];
-                    last_child.next_sibling = Some(node_id);
-                }
-
-                // Update the new node's `prev_sibling`
-                {
-                    let new_node = &mut self.nodes[node_id];
-                    new_node.prev_sibling = Some(last_child_id);
-                }
-
-                // Update parent's last child
-                let parent_node = &mut self.nodes[parent_id];
-                parent_node.children = Some((first_child_id, node_id));
-            } else {
-                // No existing children
-                let parent_node = &mut self.nodes[parent_id];
-                parent_node.children = Some((node_id, node_id));
-            }
-        }
-
-        node_id
-    }
-
-    // Add a single value to a node
-    fn add_value<S>(&mut self, node_id: NodeId, value: S)
-    where
-        S: Into<NodeValueTypes>,
-    {
-        if let Some(node) = self.nodes.get_mut(node_id) {
-            match &mut node.value {
-                NodeValue::Single(v) => {
-                    let values = vec![v.clone(), value.into()];
-                    node.value = NodeValue::Multiple(values);
-                }
-                NodeValue::Multiple(values) => {
-                    values.push(value.into());
-                }
-            }
-        }
-    }
-
-    // Add multiple values to a node
-    fn add_values<S>(&mut self, node_id: NodeId, values: Vec<S>)
-    where
-        S: Into<NodeValueTypes>,
-    {
-        if let Some(node) = self.nodes.get_mut(node_id) {
-            match &mut node.value {
-                NodeValue::Single(v) => {
-                    let mut new_values = vec![v.clone()];
-                    new_values.extend(values.into_iter().map(|v| v.into()));
-                    node.value = NodeValue::Multiple(new_values);
-                }
-                NodeValue::Multiple(existing_values) => {
-                    existing_values.extend(values.into_iter().map(|v| v.into()));
-                }
-            }
-        }
-    }
-
-    fn get_node(&self, node_id: NodeId) -> Option<&Node<Payload>> {
-        self.nodes.get(node_id)
-    }
-
-    // TODO: better if this returns an iterator?
-    fn get_children(&self, node_id: NodeId) -> Vec<NodeId> {
-        let mut children = Vec::new();
-
-        if let Some(node) = self.get_node(node_id) {
-            if let Some((first_child_id, _)) = node.children {
-                let mut current_id = Some(first_child_id);
-                while let Some(cid) = current_id {
-                    children.push(cid);
-                    current_id = self.nodes[cid].next_sibling;
-                }
-            }
-        }
-
-        children
-    }
-
-    fn remove_node(&mut self, node_id: NodeId) {
-        // Remove the node and update parent and siblings
-        if let Some(node) = self.nodes.remove(node_id) {
-            // Update parent's children
-            if let Some(parent_id) = node.parent {
-                let parent_node = self.nodes.get_mut(parent_id).unwrap();
-                if let Some((first_child_id, last_child_id)) = parent_node.children {
-                    if first_child_id == node_id && last_child_id == node_id {
-                        // Node was the only child
-                        parent_node.children = None;
-                    } else if first_child_id == node_id {
-                        // Node was the first child
-                        parent_node.children = Some((node.next_sibling.unwrap(), last_child_id));
-                    } else if last_child_id == node_id {
-                        // Node was the last child
-                        parent_node.children = Some((first_child_id, node.prev_sibling.unwrap()));
-                    }
-                }
-            }
-
-            // Update siblings
-            if let Some(prev_id) = node.prev_sibling {
-                self.nodes[prev_id].next_sibling = node.next_sibling;
-            }
-            if let Some(next_id) = node.next_sibling {
-                self.nodes[next_id].prev_sibling = node.prev_sibling;
-            }
-
-            // Recursively remove children
-            let children_ids = self.get_children(node_id);
-            for child_id in children_ids {
-                self.remove_node(child_id);
-            }
-        }
-    }
-
-    fn is_root(&self, node_id: NodeId) -> bool {
-        self.nodes[node_id].parent.is_none()
-    }
-
-    fn is_leaf(&self, node_id: NodeId) -> bool {
-        self.nodes[node_id].children.is_none()
-    }
-
-    fn add_payload(&mut self, node_id: NodeId, payload: Payload) {
-        if let Some(node) = self.nodes.get_mut(node_id) {
-            node.data = Some(payload);
-        }
-    }
-
-    fn print_tree(&self) {
-        // Find all root nodes (nodes without a parent)
-        let roots: Vec<NodeId> = self
-            .nodes
-            .iter()
-            .filter_map(|(id, node)| {
-                if node.parent.is_none() {
-                    Some(id)
-                } else {
-                    None
-                }
-            })
-            .collect();
-
-        // Iterate through each root node and print its subtree
-        for (i, root_id) in roots.iter().enumerate() {
-            let is_last = i == roots.len() - 1;
-            self.print_node(*root_id, String::new(), is_last);
-        }
-    }
-
-    /// Recursively prints a node and its children.
-    ///
-    /// - `node_id`: The current node's ID.
-    /// - `prefix`: The string prefix for indentation and branch lines.
-    /// - `is_last`: Boolean indicating if the node is the last child of its parent.
-    fn print_node(&self, node_id: NodeId, prefix: String, is_last: bool) {
-        // Retrieve the current node
-        let node = match self.nodes.get(node_id) {
-            Some(n) => n,
-            None => return, // Node not found; skip
-        };
-
-        // Determine the branch character
-        let branch = if prefix.is_empty() {
-            "" // Root node doesn't have a branch
-        } else if is_last {
-            "└── " // Last child
-        } else {
-            "├── " // Middle child
-        };
-
-        // Print the current node's key and values
-        print!("{}{}{}", prefix, branch, node.key);
-        match &node.value {
-            NodeValue::Single(v) => match v {
-                NodeValueTypes::String(s) => println!(": ({})", s),
-                NodeValueTypes::Int(i) => println!(": ({})", i),
-            },
-            NodeValue::Multiple(vs) => {
-                let values: Vec<String> = vs
-                    .iter()
-                    .map(|v| match v {
-                        NodeValueTypes::String(s) => s.to_string(),
-                        NodeValueTypes::Int(i) => i.to_string(),
-                    })
-                    .collect();
-                println!(": ({})", values.join(", "));
-            }
-        }
-
-        // Prepare the prefix for child nodes
-        let new_prefix = if prefix.is_empty() {
-            if is_last {
-                "    ".to_string()
-            } else {
-                "│   ".to_string()
-            }
-        } else {
-            if is_last {
-                format!("{}    ", prefix)
-            } else {
-                format!("{}│   ", prefix)
-            }
-        };
-
-        // Retrieve and iterate through child nodes
-        if let Some((_first_child_id, _last_child_id)) = node.children {
-            let children = self.get_children(node_id);
-            let total = children.len();
-            for (i, child_id) in children.iter().enumerate() {
-                let child_is_last = i == total - 1;
-                self.print_node(*child_id, new_prefix.clone(), child_is_last);
-            }
-        }
-    }
-}
-
-fn main() {
-    let mut tree: QueryTree<i16> = QueryTree::new();
-
-    let value = "hello";
-    let axis = Rc::new("foo".to_string());
-
-    let root_id = tree.add_node(&axis, value, None);
-
-    use std::time::Instant;
-    let now = Instant::now();
-
-    for _ in 0..100 {
-        // let child_value = format!("child_val{}", i);
-        let child_id = tree.add_node(&axis, value, Some(root_id));
-        // tree.add_value(child_id, value);
-
-        for _ in 0..100 {
-            // let gchild_value = format!("gchild_val{}", j);
-            let gchild_id = tree.add_node(&axis, value, Some(child_id));
-            // tree.add_values(gchild_id, vec![1, 2]);
-
-            for _ in 0..1000 {
-                // let ggchild_value = format!("ggchild_val{}", k);
-                let _ggchild_id = tree.add_node(&axis, value, Some(gchild_id));
-                // tree.add_value(_ggchild_id, value);
-                // tree.add_values(_ggchild_id, vec![1, 2, 3, 4]);
-            }
-        }
-    }
-
-    assert_eq!(tree.nodes.len(), 10_010_101);
-
-    let elapsed = now.elapsed();
-    println!("Elapsed: {:.2?}", elapsed);
-
-    // tree.print_tree();
-}
--- a/src/rust/connectors/fdb.rs
+++ b/src/rust/connectors/fdb.rs
@ -0,0 +1,76 @@
+use rsfdb::listiterator::KeyValueLevel;
+use rsfdb::request::Request;
+use rsfdb::FDB;
+
+use serde_json::{json, Value};
+use std::time::Instant;
+
+
+use std::collections::HashMap;
+
+pub mod tree;
+use std::sync::Arc;
+use std::sync::Mutex;
+use tree::TreeNode;
+
+#[pyclass(unsendable)]
+pub struct PyFDB {
+    pub fdb: FDB,
+}
+
+#[pymethods]
+impl PyFDB {
+    #[new]
+    #[pyo3(signature = (fdb_config=None))]
+    pub fn new(fdb_config: Option<&str>) -> PyResult<Self> {
+        let fdb = FDB::new(fdb_config)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(e.to_string()))?;
+        Ok(PyFDB { fdb })
+    }
+
+    /// Traverse the FDB with the given request.
+    pub fn traverse_fdb(
+        &self,
+        py: Python<'_>,
+        request: HashMap<String, Vec<String>>,
+    ) -> PyResult<PyObject> {
+        let start_time = Instant::now();
+
+        let list_request = Request::from_json(json!(request))
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+
+        // Use `fdb_guard` instead of `self.fdb`
+        let list = self
+            .fdb
+            .list(&list_request, true, true)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(e.to_string()))?;
+
+        let mut root = TreeNode::new(KeyValueLevel {
+            key: "root".to_string(),
+            value: "root".to_string(),
+            level: 0,
+        });
+
+        for item in list {
+            py.check_signals()?;
+
+            if let Some(request) = &item.request {
+                root.insert(&request);
+            }
+        }
+
+        let duration = start_time.elapsed();
+        println!("Total runtime: {:?}", duration);
+
+        let py_dict = root.to_py_dict(py)?;
+        Ok(py_dict)
+    }
+}
+
+use pyo3::prelude::*;
+
+#[pymodule]
+fn rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_class::<PyFDB>()?;
+    Ok(())
+}
--- a/src/rust/formatters/mod.rs
+++ b/src/rust/formatters/mod.rs
@ -0,0 +1,147 @@
+use crate::{Node, NodeId, Qube};
+use itertools::Itertools;
+use itertools::Position;
+
+impl Node {
+    /// Generate a human readable summary of the node
+    /// Examples include: key=value1/value2/.../valueN, key=value1/to/value1, key=*, root etc
+    pub fn summary(&self, qube: &Qube) -> String {
+        if self.is_root() {
+            return "root".to_string();
+        }
+        let key = &qube[self.key];
+        let values: String =
+            Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect();
+
+        format!("{}={}", key, values)
+    }
+
+    pub fn html_summary(&self, qube: &Qube) -> String {
+        if self.is_root() {
+            return r#"<span class="qubed-node">root</span>"#.to_string();
+        }
+        let key = &qube[self.key];
+        let values: String =
+            Itertools::intersperse(self.values.iter().map(|id| &qube[*id]), "/").collect();
+
+        let summary = format!("{}={}", key, values);
+        let path = summary.clone();
+        let info = format!("is_root: {}", self.is_root());
+        format!(r#"<span class="qubed-node" data-path="{path}" title="{info}">{summary}</span>"#)
+    }
+}
+
+struct NodeSummary {
+    summary: String,
+    end: NodeId,
+}
+
+enum SummaryType {
+    PlainText,
+    HTML,
+}
+
+/// Given a Node, traverse the tree until a node has more than one child.
+/// Returns a summary of the form "key1=v1/v2, key2=v1/v2/v3, key3=v1"
+/// and the id of the last node in the summary
+fn summarise_nodes(qube: &Qube, node_id: &NodeId, summary_type: SummaryType) -> NodeSummary {
+    let mut node_id = *node_id;
+    let mut summary_vec = vec![];
+    loop {
+        let node = &qube[node_id];
+        let summary = match summary_type {
+            SummaryType::PlainText => node.summary(&qube),
+            SummaryType::HTML => node.html_summary(&qube),
+        };
+        summary_vec.push(summary);
+
+        // Bail out if the node has anothing other than 1 child.
+        match node.has_exactly_one_child() {
+            Some(n) => node_id = n,
+            None => break,
+        };
+    }
+    NodeSummary {
+        summary: summary_vec.join(", "),
+        end: node_id,
+    }
+}
+
+fn qube_to_tree(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String {
+    let NodeSummary {
+        summary,
+        end: node_id,
+    } = summarise_nodes(qube, node_id, SummaryType::PlainText);
+
+    let mut output: Vec<String> = Vec::new();
+
+    if depth <= 0 {
+        return format!("{} - ...\n", summary);
+    } else {
+        output.push(format!("{}\n", summary));
+    }
+
+    let node = &qube[node_id];
+    for (position, child_id) in node.children().with_position() {
+        let (connector, extension) = match position {
+            Position::Last | Position::Only => ("└── ", "    "),
+            _ => ("├── ", "│   "),
+        };
+        output.extend([
+            prefix.to_string(),
+            connector.to_string(),
+            qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1),
+        ]);
+    }
+
+    output.join("")
+}
+
+fn qube_to_html(qube: &Qube, node_id: &NodeId, prefix: &str, depth: usize) -> String {
+    let NodeSummary {
+        summary,
+        end: node_id,
+    } = summarise_nodes(qube, node_id, SummaryType::PlainText);
+
+    let node = &qube[node_id];
+    let mut output: Vec<String> = Vec::new();
+
+    let open = if depth > 0 { "open" } else { "" };
+    output.push(format!(
+        r#"<details {open}><summary class="qubed-level">{summary}</summary>"#
+    ));
+
+    for (position, child_id) in node.children().with_position() {
+        let (connector, extension) = match position {
+            Position::Last | Position::Only => ("└── ", "    "),
+            _ => ("├── ", "│   "),
+        };
+        output.extend([
+            prefix.to_string(),
+            connector.to_string(),
+            qube_to_tree(qube, child_id, &format!("{prefix}{extension}"), depth - 1),
+        ]);
+    }
+
+    output.join("")
+}
+
+impl Qube {
+    /// Return a string version of the Qube in the format
+    /// root
+    /// ├── class=od, expver=0001/0002, param=1/2
+    /// └── class=rd, param=1/2/3
+    pub fn string_tree(&self) -> String {
+        qube_to_tree(&self, &self.root, "", 5)
+    }
+
+    /// Return an HTML version of the Qube which renders like this
+    /// root
+    /// ├── class=od, expver=0001/0002, param=1/2
+    /// └── class=rd, param=1/2/3
+    /// But under the hood children are represented with a details/summary tag and each key=value is a span
+    /// CSS and JS functionality is bundled inside.
+    pub fn html_tree(&self) -> String {
+        qube_to_html(&self, &self.root, "", 5)
+    }
+}
--- a/src/rust/lib.rs
+++ b/src/rust/lib.rs
@ -1,82 +1,235 @@
-// #![allow(unused_imports)]
-// #![allow(dead_code)]
-// #![allow(unused_variables)]
+#![allow(unused_imports)]

-// use rsfdb::listiterator::KeyValueLevel;
-// use rsfdb::request::Request;
-// use rsfdb::FDB;
+use pyo3::prelude::*;
+use pyo3::wrap_pyfunction;
+use pyo3::types::{PyDict, PyInt, PyList, PyString};
+use python_interface::QubeError;
+use std::collections::HashMap;
+use std::iter;
+use pyo3::prelude::*;
+use std::hash::Hash;
+use std::rc::Rc;

-// use serde_json::{json, Value};
-// use std::time::Instant;
+use lasso::{Rodeo, Spur};
+use std::num::NonZero;
+use std::ops;

-// use pyo3::prelude::*;
-// use pyo3::types::{PyDict, PyInt, PyList, PyString};
+mod serialisation;
+mod python_interface;
+mod formatters;
+mod set_operations;

-// use std::collections::HashMap;
+// This data structure uses the Newtype Index Pattern
+// See https://matklad.github.io/2018/06/04/newtype-index-pattern.html
+// See also https://github.com/nrc/r4cppp/blob/master/graphs/README.md#rcrefcellnode for a discussion of other approaches to trees and graphs in rust.
+// https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/

-// pub mod tree;
-// use std::sync::Arc;
-// use std::sync::Mutex;
-// use tree::TreeNode;
+// Index types use struct Id(NonZero<usize>)
+// This reserves 0 as a special value which allows Option<Id(NonZero<usize>)> to be the same size as usize.

-// #[pyclass(unsendable)]
-// pub struct PyFDB {
-//     pub fdb: FDB,
-// }
+#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
+pub(crate) struct NodeId(NonZero<usize>);

-// #[pymethods]
-// impl PyFDB {
-//     #[new]
-//     #[pyo3(signature = (fdb_config=None))]
-//     pub fn new(fdb_config: Option<&str>) -> PyResult<Self> {
-//         let fdb = FDB::new(fdb_config)
-//             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(e.to_string()))?;
-//         Ok(PyFDB { fdb })
-//     }
+// Allow node indices to index directly into Qubes:
+impl ops::Index<NodeId> for Qube {
+    type Output = Node;

-//     /// Traverse the FDB with the given request.
-//     pub fn traverse_fdb(
-//         &self,
-//         py: Python<'_>,
-//         request: HashMap<String, Vec<String>>,
-//     ) -> PyResult<PyObject> {
-//         let start_time = Instant::now();
+    fn index(&self, index: NodeId) -> &Node {
+        &self.nodes[index.0.get() - 1]
+    }
+}

-//         let list_request = Request::from_json(json!(request))
-//             .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+impl ops::IndexMut<NodeId> for Qube {
+    fn index_mut(&mut self, index: NodeId) -> &mut Node {
+        &mut self.nodes[index.0.get() - 1]
+    }
+}

-//         // Use `fdb_guard` instead of `self.fdb`
-//         let list = self
-//             .fdb
-//             .list(&list_request, true, true)
-//             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(e.to_string()))?;
+impl ops::Index<StringId> for Qube {
+    type Output = str;

-//         let mut root = TreeNode::new(KeyValueLevel {
-//             key: "root".to_string(),
-//             value: "root".to_string(),
-//             level: 0,
-//         });
+    fn index(&self, index: StringId) -> &str {
+        &self.strings[index]
+    }
+}

-//         for item in list {
-//             py.check_signals()?;
+impl NodeId {
+    pub fn new(value: usize) -> Option<NodeId> {
+        NonZero::new(value).map(NodeId)
+    }
+}

-//             if let Some(request) = &item.request {
-//                 root.insert(&request);
-//             }
-//         }
+#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
+struct StringId(lasso::Spur);

-//         let duration = start_time.elapsed();
-//         println!("Total runtime: {:?}", duration);
+impl ops::Index<StringId> for lasso::Rodeo {
+    type Output = str;

-//         let py_dict = root.to_py_dict(py)?;
-//         Ok(py_dict)
-//     }
-// }
+    fn index(&self, index: StringId) -> &str {
+        &self[index.0]
+    }
+}

-// use pyo3::prelude::*;
+#[derive(Debug, Clone)]
+pub(crate) struct Node {
+    pub key: StringId,
+    pub metadata: HashMap<StringId, Vec<String>>,
+    pub parent: Option<NodeId>, // If not present, it's the root node
+    pub values: Vec<StringId>,
+    pub children: HashMap<StringId, Vec<NodeId>>,
+}

-// #[pymodule]
-// fn rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
-//     m.add_class::<PyFDB>()?;
-//     Ok(())
-// }
+impl Node {
+    fn new_root(q: &mut Qube) -> Node {
+        Node {
+            key: q.get_or_intern("root"),
+            metadata: HashMap::new(),
+            parent: None,
+            values: vec![],
+            children: HashMap::new(),
+        }
+    }
+
+    fn children(&self) -> impl Iterator<Item = &NodeId> {
+        self.children.values().flatten()
+    }
+
+    fn is_root(&self) -> bool {
+        self.parent.is_none()
+    }
+
+    /// Because children are stored grouped by key
+    /// determining the number of children quickly takes a little effort.
+    /// This is a fast method for the special case of checking if a Node has exactly one child.
+    /// Returns Ok(NodeId) if there is one child else None
+    fn has_exactly_one_child(&self) -> Option<NodeId> {
+        if self.children.len() != 1 {return None}
+        let Some(value_group) = self.children.values().next() else {return None};
+        let [node_id] = &value_group.as_slice() else {return None};
+        Some(*node_id)
+    }
+
+    fn n_children(&self) -> usize {
+        self.children
+            .values()
+            .map(|v| v.len())
+            .sum()
+    }
+
+    fn keys<'a>(&'a self, q: &'a Qube) -> impl Iterator<Item = &'a str> {
+        self.children.keys()
+        .map(|s| {&q[*s]})
+    }
+}
+
+#[derive(Debug, Clone)]
+#[pyclass(subclass, dict)]
+pub struct Qube {
+    pub root: NodeId,
+    nodes: Vec<Node>,
+    strings: Rodeo,
+}
+
+impl Qube {
+    pub fn new() -> Self {
+        let mut q = Self {
+            root: NodeId::new(1).unwrap(),
+            nodes: Vec::new(),
+            strings: Rodeo::default(),
+        };
+
+        let root = Node::new_root(&mut q);
+        q.nodes.push(root);
+        q
+    }
+
+    fn get_or_intern(&mut self, val: &str) -> StringId {
+        StringId(self.strings.get_or_intern(val))
+    }
+
+    pub(crate) fn add_node(&mut self, parent: NodeId, key: &str, values: impl IntoIterator<Item = impl AsRef<str>>) -> NodeId {
+        let key_id = self.get_or_intern(key);
+        let values = values.into_iter().map(|val| self.get_or_intern(val.as_ref())).collect();
+
+        // Create the node object
+        let node = Node {
+            key: key_id,
+            metadata: HashMap::new(),
+            values: values,
+            parent: Some(parent),
+            children: HashMap::new(),
+        };
+
+        // Insert it into the Qube arena and determine its id
+        self.nodes.push(node);
+        let node_id = NodeId::new(self.nodes.len()).unwrap();
+
+        // Add a reference to this node's id to the parents list of children.
+        let parent_node = &mut self[parent];
+        let key_group = parent_node.children.entry(key_id).or_insert(Vec::new());
+        key_group.push(node_id);
+
+        node_id
+    }
+
+    fn print(&self, node_id: Option<NodeId>) -> String {
+        let node_id: NodeId = node_id.unwrap_or(self.root);
+        let node = &self[node_id];
+        node.summary(&self)
+    }
+
+    fn get_node_ref(&self, id: NodeId) -> NodeRef {
+        let node = &self[id];
+        NodeRef { id: id, node: &node, qube: &self }
+    }
+
+    pub fn get_string_id(&self, s: &str) -> Option<StringId> {
+        self.strings.get(s)
+        .map(|id| StringId(id))
+    }
+}
+
+
+#[pymodule]
+fn rust(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_class::<Qube>()?;
+    m.add("QubeError", py.get_type::<python_interface::QubeError>())?;
+    Ok(())
+}
+
+
+pub struct NodeRef<'a> {
+    pub id: NodeId,
+    pub node: &'a Node,
+    pub qube: &'a Qube,
+}
+
+impl<'a> NodeRef<'a> {
+    pub fn keys(&self) -> impl Iterator<Item = &str> {
+        self.node.keys(self.qube)
+    }
+
+    fn flat_children(&'a self) -> impl Iterator<Item = Self> {
+        self.node.children
+        .values()
+        .flatten()
+        .map(|id| {
+            NodeRef { id: *id, node: &self.qube[*id], qube: self.qube }
+        })
+    }
+
+    fn children_by_key(&'a self, key: &str) -> impl Iterator<Item = Self> {
+        let id = self.qube.get_string_id(key);
+        let children = id
+            .map(|i| self.node.children.get(&i))
+            .flatten();
+
+        children.map(
+            |ids| ids.into_iter().map(
+                |id| {
+                NodeRef { id: *id, node: &self.qube[*id], qube: self.qube }
+        })).into_iter().flatten()
+    }
+
+
+}
--- a/src/rust/python_interface.rs
+++ b/src/rust/python_interface.rs
@ -0,0 +1,179 @@
+use crate::{Node, NodeId, Qube, NodeRef};
+use pyo3::prelude::*;
+use pyo3::types::{PyList, PyType};
+use core::borrow;
+use std::ops::Deref;
+use std::cell::Ref;
+
+use crate::set_operations;
+use crate::serialisation;
+use itertools::Itertools;
+
+use pyo3::create_exception;
+
+create_exception!(qubed, QubeError, pyo3::exceptions::PyException);
+
+/// A reference to a particular node in a Qube
+#[pyclass]
+pub struct PyNodeRef {
+    id: NodeId,
+    qube: Py<Qube>, // see https://pyo3.rs/v0.23.1/types for a discussion of Py<T> and Bound<'py, T>
+}
+
+fn into_py_node_ref(node_ref: NodeRef, qube: Py<Qube>) -> PyNodeRef {
+    PyNodeRef {
+        id: node_ref.id,
+        qube: qube,
+    }
+}
+
+#[pymethods]
+impl PyNodeRef {
+    fn __repr__(&self, py: Python) -> PyResult<String> {
+        // Get the Py<Qube> reference, bind it to the GIL.
+        let qube = self.qube.bind(py);
+
+        fn repr_helper<'py>(node_id: NodeId, qube: &Bound<'py, Qube>) -> String {
+            let node = &qube.borrow()[node_id];
+            let key = &qube.borrow()[node.key];
+            let children = node
+                .children
+                .values()
+                .flatten()
+                .map(|child_id| repr_helper(child_id.clone(), qube))
+                .collect::<Vec<String>>()
+                .join(", ");
+
+            format!("Node({}, {})", key, children)
+        }
+
+        Ok(repr_helper(self.id, qube))
+    }
+
+    fn __str__(&self, py: Python) -> String {
+        let qube = self.qube.bind(py).borrow();
+        let node = &qube[self.id];
+        let key = &qube.strings[node.key];
+        format!("Node({})", key)
+    }
+
+    #[getter]
+    pub fn get_children(&self, py: Python) -> Vec<Self> {
+        let qube = self.qube.bind(py).borrow();
+        let node = &qube[self.id];
+        node.children
+            .values()
+            .flatten()
+            .map(|child_id| Self {
+                id: *child_id,
+                qube: self.qube.clone_ref(py),
+            })
+            .collect()
+    }
+}
+
+#[derive(FromPyObject)]
+pub enum OneOrMany<T> {
+    One(T),
+    Many(Vec<T>),
+}
+
+// Todo: Is there a way to rewrite this so that is doesn't allocate?
+// Perhaps by returning an iterator?
+impl<T> Into<Vec<T>> for OneOrMany<T> {
+    fn into(self) -> Vec<T> {
+        match self {
+            OneOrMany::One(v) => vec![v],
+            OneOrMany::Many(vs) => vs,
+        }
+    }
+}
+
+#[pymethods]
+impl Qube {
+    #[new]
+    pub fn py_new() -> Self {
+        Qube::new()
+    }
+
+    #[pyo3(name = "add_node")]
+    pub fn py_add_node(
+        slf: Bound<'_, Self>,
+        parent: PyRef<'_, PyNodeRef>,
+        key: &str,
+        values: OneOrMany<String>,
+    ) -> PyResult<PyNodeRef> {
+        // Check that the given parent is actually in this qube and not another one
+        if !parent.qube.bind(slf.py()).is(&slf) {
+            return Err(QubeError::new_err("Supplied parent node is not in the target qube."))
+        }
+
+        // massage values from T | Vec<T> into Vec<T>
+        let values: Vec<String> = values.into();
+        let mut q = slf.borrow_mut();
+        let node_id = q.add_node(parent.id, key, &values);
+        Ok(PyNodeRef { id: node_id, qube: slf.into()})
+    }
+
+    pub fn set_root(
+        slf: Bound<'_, Self>,
+        node: PyRef<'_, PyNodeRef>,
+    ) -> () {
+        let mut q = slf.borrow_mut();
+        q.root = node.id;
+    }
+
+    #[getter]
+    fn get_root(slf: Bound<'_, Self>) -> PyResult<PyNodeRef> {
+        Ok(PyNodeRef {
+            id: slf.borrow().root,
+            qube: slf.unbind(),
+        })
+    }
+
+    fn __repr__(&self) -> String {
+        // format!("{:?}", self)
+        let nodes_str: String = self.nodes.iter()
+        .enumerate()
+        .map(|(id, node)| {
+            format!("{{id: {}, key: {}, values: [{}], children: [{}]}}",
+            id+1,
+            &self[node.key],
+            node.values.iter().map(|s| &self[*s]).join(", "),
+            node.children().map(|n| n.0).join(", "),
+        )
+        }).join(", ");
+        format!("Qube {{root: {}, nodes: {}}}", self.root.0, nodes_str)
+    }
+
+    fn __str__<'py>(&self) -> String {
+        self.string_tree()
+    }
+
+    fn _repr_html_(&self) -> String {
+        self.html_tree()
+    }
+
+    #[pyo3(name = "print")]
+    fn py_print(&self) -> String {
+        self.print(Option::None)
+    }
+
+    #[getter]
+    pub fn get_children(slf: Bound<'_, Self>, py: Python) -> PyResult<Vec<PyNodeRef>> {
+        let root = PyNodeRef {
+            id: slf.borrow().root,
+            qube: slf.unbind(),
+        };
+        Ok(root.get_children(py))
+    }
+
+    #[staticmethod]
+    pub fn from_json(data: &str) -> Result<Self, serialisation::JSONError> {
+        serialisation::from_json(data)
+    }
+
+    pub fn __or__(slf: Bound<'_, Self>, other: Bound<'_, Qube>) -> Qube {
+    set_operations::set_operation(&slf.borrow(), &other.borrow(), set_operations::Op::Union)
+    }
+}
--- a/src/rust/serialisation/json.rs
+++ b/src/rust/serialisation/json.rs
@ -0,0 +1,80 @@
+use pyo3::exceptions::PyValueError;
+use pyo3::prelude::*;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+
+use crate::{Node, NodeId, Qube};
+
+// Use a newtype wrapper to allow us to implement auto conversion from serde_json::Error to PyErr
+// via a wrapper intermediate
+// see https://pyo3.rs/main/function/error-handling.html#foreign-rust-error-types
+pub struct JSONError(serde_json::Error);
+
+impl From<JSONError> for PyErr {
+    fn from(error: JSONError) -> Self {
+        PyValueError::new_err(format!("{}", error.0))
+    }
+}
+
+impl From<serde_json::Error> for JSONError {
+    fn from(other: serde_json::Error) -> Self {
+        Self(other)
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(tag = "dtype")]
+enum Ranges {
+    Int64{values: Vec<(i64, i64)>}
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(tag = "dtype", rename_all = "lowercase")]
+enum Enum {
+    Str{values: Vec<String>}
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(tag = "type", rename_all = "lowercase")]
+enum Values {
+    Wildcard{},
+    Enum(Enum),
+    Range(Ranges)
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+struct JSONQube {
+    key: String,
+    values: Values,
+    metadata: HashMap<String, String>,
+    children: Vec<JSONQube>,
+}
+
+fn add_nodes(qube: &mut Qube, parent: NodeId, nodes: &[JSONQube]) -> Vec<NodeId> {
+    nodes
+        .iter()
+        .map(|json_node| {
+            let values = match &json_node.values {
+                Values::Wildcard{} => &vec!["*"],
+                Values::Enum(Enum::Str{values}) => &values.iter().map(|s| s.as_str()).collect(),
+                Values::Range(_) => todo!(),
+            };
+            let node_id = qube.add_node(parent, &json_node.key, values);
+
+            //
+            add_nodes(qube, node_id, &json_node.children);
+            node_id
+        })
+        .collect()
+}
+
+pub fn from_json(data: &str) -> Result<Qube, JSONError> {
+    // Parse the string of data into serde_json::Value.
+    let json_qube: JSONQube = serde_json::from_str(data).expect("JSON parsing failed");
+
+    let mut qube = Qube::new();
+    let root = qube.root;
+    add_nodes(&mut qube, root, &json_qube.children);
+    Ok(qube)
+}
--- a/src/rust/serialisation/mod.rs
+++ b/src/rust/serialisation/mod.rs
@ -0,0 +1,2 @@
+mod json;
+pub use json::{from_json, JSONError};
--- a/src/rust/set_operations.rs
+++ b/src/rust/set_operations.rs
@ -0,0 +1,40 @@
+use crate::NodeRef;
+use crate::{Node, NodeId, Qube};
+use itertools::chain;
+use std::collections::HashSet;
+
+pub enum Op {
+    Union,
+    Intersection,
+    Difference,
+    SymmetricDifference,
+}
+
+fn op_to_venn_diagram(op: Op) -> (bool, bool, bool) {
+    use Op::*;
+    match op {
+        Union => (true, true, true),
+        Intersection => (false, true, false),
+        Difference => (true, false, false),
+        SymmetricDifference => (true, false, true),
+    }
+}
+
+pub fn set_operation<'a>(a: &'a Qube, b: &'a Qube, op: Op) -> Qube {
+    todo!()
+    // _set_operation(a.root_ref(), a.root_ref(), op)
+}
+
+// fn _set_operation<'a>(a: NodeRef, b: NodeRef, op: Op) -> Qube {
+//     let keys: HashSet<&str> = HashSet::from_iter(chain(a.keys(), b.keys()));
+
+//     for key in keys {
+//         let a = a.children_by_key(key)
+//     }
+
+//     todo!()
+// }
+
+pub fn set_operation_inplace<'a>(a: &'a mut Qube, b: &'a Qube, op: Op) -> &'a Qube {
+    a
+}
--- a/src/rust/tree.rs
+++ b/src/rust/tree.rs
@ -1,82 +0,0 @@
-// use pyo3::prelude::*;
-// use pyo3::types::PyDict;
-// use rsfdb::listiterator::KeyValueLevel;
-// use serde_json::Value;
-
-// #[derive(Debug)]
-// pub struct TreeNode {
-//     pub key: KeyValueLevel,
-//     pub children: Vec<TreeNode>,
-// }
-
-// impl TreeNode {
-//     pub fn new(key: KeyValueLevel) -> Self {
-//         TreeNode {
-//             key,
-//             children: Vec::new(),
-//         }
-//     }
-
-//     pub fn insert(&mut self, path: &[KeyValueLevel]) {
-//         if path.is_empty() {
-//             return;
-//         }
-
-//         let kvl = &path[0];
-
-//         // Check if a child with the same key and value exists
-//         if let Some(child) = self.children.iter_mut().find(|child| child.key == *kvl) {
-//             // Insert the remaining path into the existing child
-//             child.insert(&path[1..]);
-//         } else {
-//             // Create a new child node
-//             let mut new_child = TreeNode::new(kvl.clone());
-//             new_child.insert(&path[1..]);
-//             self.children.push(new_child);
-//         }
-//     }
-
-//     pub fn traverse<F>(&self, level: usize, callback: &F)
-//     where
-//         F: Fn(&TreeNode, usize),
-//     {
-//         callback(self, level);
-//         for child in &self.children {
-//             child.traverse(level + 1, callback);
-//         }
-//     }
-
-//     pub fn to_json(&self) -> Value {
-//         let formatted_key = format!("{}={}", self.key.key, self.key.value);
-
-//         let children_json: Value = if self.children.is_empty() {
-//             Value::Object(serde_json::Map::new())
-//         } else {
-//             Value::Object(
-//                 self.children
-//                     .iter()
-//                     .map(|child| {
-//                         (
-//                             format!("{}={}", child.key.key, child.key.value),
-//                             child.to_json(),
-//                         )
-//                     })
-//                     .collect(),
-//             )
-//         };
-
-//         // Combine the formatted key with children
-//         serde_json::json!({ formatted_key: children_json })
-//     }
-
-//     pub fn to_py_dict(&self, py: Python) -> PyResult<PyObject> {
-//         let py_dict = PyDict::new(py);
-
-//         for child in &self.children {
-//             let child_key = format!("{}={}", child.key.key, child.key.value);
-//             py_dict.set_item(child_key, child.to_py_dict(py)?)?;
-//         }
-
-//         Ok(py_dict.to_object(py))
-//     }
-// }
--- a/stac_server/main.py
+++ b/stac_server/main.py
@ -1,17 +1,21 @@
 import json
 import os
 from collections import defaultdict
-from pathlib import Path
-from typing import Any, Dict

-import redis
+import requests
 import yaml
-from fastapi import FastAPI, Request
+from fastapi import Depends, FastAPI, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import FileResponse
-from tree_traverser import CompressedTree
+from fastapi.responses import FileResponse, HTMLResponse
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from frozendict import frozendict
+from qubed import Qube
+from qubed.tree_formatters import node_tree_to_html

 app = FastAPI()
+security = HTTPBearer()
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
@ -20,38 +24,97 @@ app.add_middleware(
    allow_headers=["*"],
 )

-@app.get('/favicon.ico', include_in_schema=False)
-async def favicon():
-    return FileResponse("favicon.ico")
+app.mount("/static", StaticFiles(directory="static"), name="static")
+templates = Jinja2Templates(directory="templates")

+qubes: dict[str, Qube] = {}
+# print("Getting climate and extremes dt data from github")
+# try:
+#     qubes["climate-dt"] = Qube.from_json(
+#         requests.get(
+#             "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json",
+#         timeout=3).json()
+#     )
+#     qubes["extremes-dt"] = Qube.from_json(
+#         requests.get(
+#             "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/extremes_dt.json",
+#         timeout=3).json()
+#     )
+#     mars_language = yaml.safe_load(
+#         requests.get(
+#             "https://github.com/ecmwf/qubed/raw/refs/heads/main/config/climate-dt/language.yaml",
+#         timeout=3).content
+#     )
+# except:
+qubes["climate-dt"] = Qube.empty()
+qubes["extremes-dt"] = Qube.empty()
+mars_language = {}

 if "LOCAL_CACHE" in os.environ:
-    print("Getting data from local file")
+    print("Getting climate and extremes dt data from local files")
+    with open("../tests/example_qubes/climate_dt.json") as f:
+        qubes["climate-dt"] = Qube.from_json(json.load(f))

-    base = Path(os.environ["LOCAL_CACHE"])
-    with open(base / "compressed_tree.json", "r") as f:
-        json_tree = f.read()
+    with open("../tests/example_qubes/extremes_dt.json") as f:
+        qubes["climate-dt"] = qubes["climate-dt"] | Qube.from_json(json.load(f))

+    with open("../tests/example_qubes/od.json") as f:
+        qubes["climate-dt"] = qubes["climate-dt"] | Qube.from_json(json.load(f))

-    with open(base / "language.yaml", "r") as f:
+    with open("../config/language/language.yaml", "r") as f:
        mars_language = yaml.safe_load(f)["_field"]

+    with open("../config/language/paramids.yaml", "r") as f:
+        params = yaml.safe_load(f)
 else:
-    print("Getting cache from redis")
-    r = redis.Redis(host="redis", port=6379, db=0)
-    json_tree = r.get('compressed_catalog')
-    assert json_tree, "No compressed tree found in redis"
-    mars_language = json.loads(r.get('mars_language'))
+    print("Getting climate and extremes dt data from github")
+    qubes["climate-dt"] = Qube.from_json(
+        requests.get(
+            "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json",
+            timeout=1,
+        ).json()
+    )
+    qubes["extremes-dt"] = Qube.from_json(
+        requests.get(
+            "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/extremes_dt.json",
+            timeout=1,
+        ).json()
+    )

-print("Loading tree from json")
-c_tree = CompressedTree.from_json(json.loads(json_tree))
+    qubes["od"] = Qube.from_json(
+        requests.get(
+            "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/od.json",
+            timeout=1,
+        ).json()
+    )
+    qubes["climate-dt"] = qubes["climate-dt"] | qubes["extremes-dt"] | qubes["od"]
+    mars_language = yaml.safe_load(
+        requests.get(
+            "https://github.com/ecmwf/qubed/raw/refs/heads/main/config/climate-dt/language.yaml",
+            timeout=3,
+        ).content
+    )["_field"]

-print("Partialy decompressing tree, shoud be able to skip this step in future.")
-tree = c_tree.reconstruct_compressed_ecmwf_style()
+if "API_KEY" in os.environ:
+    api_key = os.environ["API_KEY"]
+else:
+    with open("api_key.secret", "r") as f:
+        api_key = f.read()

 print("Ready to serve requests!")

-def request_to_dict(request: Request) -> Dict[str, Any]:
+
+def validate_key(key: str):
+    if key not in qubes:
+        raise HTTPException(status_code=404, detail=f"Qube {key} not found")
+    return key
+
+
+async def get_body_json(request: Request):
+    return await request.json()
+
+
+def parse_request(request: Request) -> dict[str, str | list[str]]:
    # Convert query parameters to dictionary format
    request_dict = dict(request.query_params)
    for key, value in request_dict.items():
@ -61,145 +124,219 @@ def request_to_dict(request: Request) -> Dict[str, Any]:

    return request_dict

-def match_against_cache(request, tree):
-    if not tree: return {"_END_" : {}}
-    matches = {}
-    for k, subtree in tree.items():
-        if len(k.split("=")) != 2:
-            raise ValueError(f"Key {k} is not in the correct format")
-        key, values = k.split("=")
-        values = set(values.split(","))
-        if key in request:
-            if isinstance(request[key], list):
-                matching_values = ",".join(request_value for request_value in request[key] if request_value in values)
-                if matching_values:
-                    matches[f"{key}={matching_values}"] = match_against_cache(request, subtree)
-            elif request[key] in values:
-                matches[f"{key}={request[key]}"] = match_against_cache(request, subtree)

-    if not matches: return {k : {} for k in tree.keys()}
-    return matches
+def validate_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    if credentials.credentials != api_key:
+        raise HTTPException(status_code=403, detail="Incorrect API Key")
+    return credentials


-def max_tree_depth(tree):
-    "Figure out the maximum depth of a tree"
-    if not tree:
-        return 0
-    return 1 + max(max_tree_depth(v) for v in tree.values())
-
-def prune_short_branches(tree, depth = None):
-    if depth is None: 
-        depth = max_tree_depth(tree)
-    return {k : prune_short_branches(v, depth-1) for k, v in tree.items() if max_tree_depth(v) == depth-1}
-        
-def get_paths_to_leaves(tree):
-    for k,v in tree.items():
-        if not v:
-            yield [k,]
-        else: 
-            for leaf in get_paths_to_leaves(v):
-                yield [k,] + leaf
-
-def get_leaves(tree):
-    for k,v in tree.items():
-        if not v:
-            yield k
-        else: 
-            for leaf in get_leaves(v):
-                yield leaf
-
-@app.get("/api/tree")
-async def get_tree(request: Request):
-    request_dict = request_to_dict(request)
-    print(c_tree.multi_match(request_dict))
-    return c_tree.multi_match(request_dict)
-
-@app.get("/api/match")
-async def get_match(request: Request):
-    # Convert query parameters to dictionary format
-    request_dict = request_to_dict(request)
-
-    # Run the schema matching logic
-    match_tree = match_against_cache(request_dict, tree)
+@app.get("/favicon.ico", include_in_schema=False)
+async def favicon():
+    return FileResponse("favicon.ico")


-    # Prune the tree to only include branches that are as deep as the deepest match
-    # This means if you don't choose a certain branch at some point
-    # the UI won't keep nagging you to choose a value for that branch
-    match_tree = prune_short_branches(match_tree)
-
-    return match_tree
-
-@app.get("/api/paths")
-async def api_paths(request: Request):
-    request_dict = request_to_dict(request)
-    match_tree = match_against_cache(request_dict, tree)
-    match_tree = prune_short_branches(match_tree)
-    paths = get_paths_to_leaves(match_tree)
+@app.get("/", response_class=HTMLResponse)
+async def read_root(request: Request):
+    return templates.TemplateResponse(
+        "index.html",
+        {
+            "request": request,
+            "config": {
+                "message": "Hello from the dev server!",
+            },
+            "api_url": os.environ.get("API_URL", "/api/v1/"),
+        },
+    )


-    # deduplicate leaves based on the key
+@app.get("/api/v1/keys/")
+async def keys():
+    return list(qubes.keys())
+
+
+@app.get("/api/v1/get/{key}/")
+async def get(
+    key: str = Depends(validate_key),
+    request: dict[str, str | list[str]] = Depends(parse_request),
+):
+    return qubes[key].to_json()
+
+
+@app.post("/api/v1/union/{key}/")
+async def union(
+    key: str,
+    credentials: HTTPAuthorizationCredentials = Depends(validate_api_key),
+    body_json=Depends(get_body_json),
+):
+    if key not in qubes:
+        qubes[key] = Qube.empty()
+
+    q = Qube.from_json(body_json)
+    qubes[key] = qubes[key] | q
+    return qubes[key].to_json()
+
+
+def follow_query(request: dict[str, str | list[str]], qube: Qube):
+    s = qube.select(request, mode="next_level", consume=False)
    by_path = defaultdict(lambda: {"paths": set(), "values": set()})
-    for p in paths:
-        if p[-1] == "_END_": continue
-        key, values = p[-1].split("=")
-        values = values.split(",")
-        path = tuple(p[:-1])

-        by_path[key]["values"].update(values)
-        by_path[key]["paths"].add(tuple(path))
+    for request, node in s.leaf_nodes():
+        if not node.metadata.get("is_leaf", True):
+            by_path[node.key]["values"].update(node.values.values)
+            by_path[node.key]["paths"].add(frozendict(request))

-    return [{
+    return s, [
+        {
            "paths": list(v["paths"]),
            "key": key,
            "values": sorted(v["values"], reverse=True),
-    } for key, v in by_path.items()]
+        }
+        for key, v in by_path.items()
+    ]

-@app.get("/api/stac")
-async def get_STAC(request: Request):
-    request_dict = request_to_dict(request)
-    paths = await api_paths(request)

+@app.get("/api/v1/select/{key}/")
+async def select(
+    key: str = Depends(validate_key),
+    request: dict[str, str | list[str]] = Depends(parse_request),
+):
+    q = qubes[key].select(request)
+    return q.to_json()
+
+
+@app.get("/api/v1/query/{key}")
+async def query(
+    key: str = Depends(validate_key),
+    request: dict[str, str | list[str]] = Depends(parse_request),
+):
+    qube, paths = follow_query(request, qubes[key])
+    return paths
+
+
+@app.get("/api/v1/basicstac/{key}/{filters:path}")
+async def basic_stac(filters: str, key: str = Depends(validate_key)):
+    pairs = filters.strip("/").split("/")
+    request = dict(p.split("=") for p in pairs if "=" in p)
+
+    qube, _ = follow_query(request, qubes[key])
+
+    def make_link(child_request):
+        """Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link"""
+        kvs = [f"{key}={value}" for key, value in child_request.items()]
+        href = f"/api/v1/basicstac/{key}/{'/'.join(kvs)}"
+        last_key, last_value = list(child_request.items())[-1]
+
+        return {
+            "title": f"{last_key}={last_value}",
+            "href": href,
+            "rel": "child",
+            "type": "application/json",
+        }
+
+    # Format the response as a STAC collection
+    (this_key, this_value), *_ = (
+        list(request.items())[-1] if request else ("root", "root"),
+        None,
+    )
+    key_info = mars_language.get(this_key, {})
+    try:
+        values_info = dict(key_info.get("values", {}))
+        value_info = values_info.get(
+            this_value, f"No info found for value `{this_value}` found."
+        )
+    except ValueError:
+        value_info = f"No info found for value `{this_value}` found."
+
+    if this_key == "root":
+        value_info = "The root node"
+    # key_desc = key_info.get(
+    #     "description", f"No description for `key` {this_key} found."
+    # )
+    print(this_key, this_value)
+
+    print(this_key, key_info)
+    stac_collection = {
+        "type": "Catalog",
+        "stac_version": "1.0.0",
+        "id": "root"
+        if not request
+        else "/".join(f"{k}={v}" for k, v in request.items()),
+        "title": f"{this_key}={this_value}",
+        "description": value_info,
+        "links": [make_link(leaf) for leaf in qube.leaves()],
+        # "debug": {
+        #     "qube": str(qube),
+        # },
+    }
+
+    return stac_collection
+
+
+@app.get("/api/v1/stac/{key}/")
+async def get_STAC(
+    key: str = Depends(validate_key),
+    request: dict[str, str | list[str]] = Depends(parse_request),
+):
+    qube, paths = follow_query(request, qubes[key])
+    kvs = [
+        f"{k}={','.join(v)}" if isinstance(v, list) else f"{k}={v}"
+        for k, v in request.items()
+    ]
+    request_params = "&".join(kvs)

    def make_link(key_name, paths, values):
        """Take a MARS Key and information about which paths matched up to this point and use it to make a STAC Link"""
-        path = paths[0]
-        href_template = f"/stac?{'&'.join(path)}{'&' if path else ''}{key_name}={{}}"
-        optional = [False]
-        optional_str = "Yes" if all(optional) and len(optional) > 0 else ("Sometimes" if any(optional) else "No")
-        values_from_mars_language = mars_language.get(key_name, {}).get("values", [])
+        href_template = f"/stac?{request_params}{'&' if request_params else ''}{key_name}={{{key_name}}}"

-        # values = [v[0] if isinstance(v, list) else v for v in values_from_mars_language]
+        print(f"{key_name = }")
+        if key_name == "param":
+            print(params)
+            values_from_mars_language = params
+            value_descriptions = [
+                max(params.get(int(v), [""]), key=len) for v in values
+            ]
+            print(value_descriptions)
+        else:
+            values_from_mars_language = mars_language.get(key_name, {}).get(
+                "values", []
+            )

            if all(isinstance(v, list) for v in values_from_mars_language):
-            value_descriptions_dict = {k : v[-1]
+                value_descriptions_dict = {
+                    k: v[-1]
                    for v in values_from_mars_language
                    if len(v) > 1
-                                       for k in v[:-1]}
-            value_descriptions = [value_descriptions_dict.get(v, "") for v in values]
-            if not any(value_descriptions): value_descriptions = None
+                    for k in v[:-1]
+                }
+                value_descriptions = [
+                    value_descriptions_dict.get(v, "") for v in values
+                ]
+                if not any(value_descriptions):
+                    value_descriptions = None

        return {
            "title": key_name,
-                "generalized_datacube:href_template": href_template,
+            "uriTemplate": href_template,
            "rel": "child",
            "type": "application/json",
-                "generalized_datacube:dimension" : {
-                    "type" : mars_language.get(key_name, {}).get("type", ""),
-                    "description": mars_language.get(key_name, {}).get("description", ""),
-                    "values" : values,
+            "variables": {
+                key_name: {
+                    "type": "string",
+                    "description": mars_language.get(key_name, {}).get(
+                        "description", ""
+                    ),
+                    "enum": values,
                    "value_descriptions": value_descriptions,
-                    "optional" : any(optional),
-                    "multiple": True,
-                    "paths" : paths,
+                    # "paths": paths,
                }
-
+            },
        }

-
    def value_descriptions(key, values):
        return {
-            v[0] : v[-1] for v in mars_language.get(key, {}).get("values", [])
+            v[0]: v[-1]
+            for v in mars_language.get(key, {}).get("values", [])
            if len(v) > 1 and v[0] in list(values)
        }

@ -210,24 +347,30 @@ async def get_STAC(request: Request):
            "description": mars_language.get(key, {}).get("description", ""),
            "value_descriptions": value_descriptions(key, values),
        }
-        for key, values in request_dict.items()
+        for key, values in request.items()
    }

    # Format the response as a STAC collection
    stac_collection = {
-        "type": "Collection",
+        "type": "Catalog",
        "stac_version": "1.0.0",
-        "id": "partial-matches",
+        "id": "root" if not request else "/stac?" + request_params,
        "description": "STAC collection representing potential children of this request",
-        "links": [
-            make_link(p["key"], p["paths"], p["values"])
-            for p in paths
-        ],
+        "links": [make_link(p["key"], p["paths"], p["values"]) for p in paths],
        "debug": {
-            "request": request_dict,
+            # "request": request,
            "descriptions": descriptions,
-            "paths" : paths,
-        }
+            # "paths": paths,
+            "qube": node_tree_to_html(
+                qube.compress(),
+                collapse=True,
+                depth=10,
+                include_css=False,
+                include_js=False,
+                max_summary_length=200,
+                css_id="qube",
+            ),
+        },
    }

    return stac_collection
--- a/stac_server/requirements.txt
+++ b/stac_server/requirements.txt
@ -1,3 +1,5 @@
 fastapi[standard]
 pe
 redis
+frozendict
+requests
--- a/stac_server/run.sh
+++ b/stac_server/run.sh
@ -1,3 +1,3 @@
 parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
 cd "$parent_path"
-LOCAL_CACHE=../config/climate-dt fastapi dev ./main.py --port 8124 --reload
+LOCAL_CACHE=True fastapi dev ./main.py --port 8124 --reload
--- a/stac_server/run_prod.sh
+++ b/stac_server/run_prod.sh
@ -0,0 +1,3 @@
+parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
+cd "$parent_path"
+sudo LOCAL_CACHE=True ../../.venv/bin/fastapi dev ./main.py --port 80 --host=0.0.0.0 --reload
--- a/web_query_builder/static/app.js
+++ b/web_query_builder/static/app.js
@ -138,39 +138,23 @@ async function createCatalogItem(link, itemsContainer) {
    // Update the item div with real content
    itemDiv.classList.remove("loading");

-    const dimension = link["generalized_datacube:dimension"];
+    const variables = link["variables"];
+    const key = Object.keys(variables)[0];
+    const variable = variables[key];

    // add data-key attribute to the itemDiv
    itemDiv.dataset.key = link.title;
-    itemDiv.dataset.keyType = dimension.type;
+    itemDiv.dataset.keyType = variable.type;

    itemDiv.innerHTML = `
      <h3 class="item-title">${link.title || "No title available"}</h3>
      <p class="item-type">Key Type: ${itemDiv.dataset.keyType || "Unknown"}</p>
-      <!-- <p class="item-type">Paths: ${dimension.paths}</p> -->
-      <p class="item-type">Optional: ${dimension.optional ? "Yes" : "No"}</p>
      <p class="item-description">${
-        dimension.description
-          ? dimension.description.slice(0, 100)
-          : "No description available"
-      }...</p>
+        variable.description ? variable.description.slice(0, 100) : ""
+      }</p>
    `;

-    // if (dimension.type === "date" || dimension.type === "time") {
-    //   // Render a date picker for the "date" key
-    //   const picker = `<input type="${link.title}" name="${link.title}">`;
-    //   //convert picker to HTML node
-    //   const pickerNode = document
-    //     .createRange()
-    //     .createContextualFragment(picker);
-    //   itemDiv.appendChild(pickerNode);
-    // }
-    // Otherwise create a scrollable list with checkboxes for values if available
-    if (
-      //   dimension.type === "enum" &&
-      dimension.values &&
-      dimension.values.length > 0
-    ) {
+    if (variable.enum && variable.enum.length > 0) {
      const listContainer = renderCheckboxList(link);
      itemDiv.appendChild(listContainer);
    } else {
@ -185,14 +169,15 @@ async function createCatalogItem(link, itemsContainer) {
 }

 function renderCheckboxList(link) {
-  const dimension = link["generalized_datacube:dimension"];
-  const value_descriptions = dimension.value_descriptions || [];
+  const variables = link["variables"];
+  const key = Object.keys(variables)[0];
+  const variable = variables[key];
+  const value_descriptions = variable.value_descriptions || [];

  const listContainerHTML = `
      <div class="item-list-container">
-        <label class="list-label">Select one or more values:</label>
        <div class="scrollable-list">
-          ${dimension.values
+          ${variable.enum
            .map((value, index) => {
              const labelText = value_descriptions[index]
                ? `${value} - ${value_descriptions[index]}`
@ -201,7 +186,7 @@ function renderCheckboxList(link) {
                <div class="checkbox-container">
                  <label class="checkbox-label">
                  <input type="checkbox" class="item-checkbox" value="${value}" ${
-                dimension.values.length === 1 ? "checked" : ""
+                variable.enum.length === 1 ? "checked" : ""
              }>
                  ${labelText}
                  </label>
@ -268,8 +253,10 @@ function renderRawSTACResponse(catalog) {
  itemDetails.textContent = JSON.stringify(just_stac, null, 2);

  const debug_container = document.getElementById("debug");
-  // create new object without debug key
  debug_container.textContent = JSON.stringify(catalog.debug, null, 2);
+
+  const qube_container = document.getElementById("qube");
+  qube_container.innerHTML = catalog.debug.qube;
 }

 // Fetch STAC catalog and display items
@ -293,6 +280,7 @@ async function fetchCatalog(request, stacUrl) {
    // Highlight the request and raw STAC
    hljs.highlightElement(document.getElementById("raw-stac"));
    hljs.highlightElement(document.getElementById("debug"));
+    hljs.highlightElement(document.getElementById("example-python"));
  } catch (error) {
    console.error("Error fetching STAC catalog:", error);
  }
--- a/stac_server/static/qube_styles.css
+++ b/stac_server/static/qube_styles.css
@ -0,0 +1,50 @@
+pre#qube {
+    font-family: monospace;
+    white-space: pre;
+    font-family: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;
+    font-size: 12px;
+    line-height: 1.4;
+
+    details {
+        margin-left: 0;
+    }
+
+    .qubed-level a {
+        margin-left: 10px;
+        text-decoration: none;
+    }
+
+    summary {
+        list-style: none;
+        cursor: pointer;
+        text-overflow: ellipsis;
+        overflow: hidden;
+        text-wrap: nowrap;
+        display: block;
+    }
+
+    span.qubed-node:hover {
+        background-color: #f0f0f0;
+    }
+
+    details > summary::after {
+        content: ' ▲';
+    }
+
+    details:not([open]) > summary::after {
+        content: " ▼";
+    }
+
+    .qubed-level {
+        text-overflow: ellipsis;
+        overflow: hidden;
+        text-wrap: nowrap;
+        display: block;
+    }
+
+    summary::-webkit-details-marker {
+      display: none;
+      content: "";
+    }
+
+}
--- a/web_query_builder/static/styles.css
+++ b/web_query_builder/static/styles.css
@ -2,6 +2,9 @@ html,
 body {
    min-height: 100vh;
    height: 100%;
+
+    --accent-color: #003399;
+    --background-grey: #f4f4f4;
 }

 body {
@ -23,7 +26,7 @@ body {
    width: 30%;
    padding: 10px;
    overflow-y: scroll;
-    background-color: #f4f4f4;
+    background-color: var(--background-grey);
    border-right: 1px solid #ddd;
 }

@ -45,7 +48,9 @@ body {
 }

 .sidebar-header button {
-    width: 10em;
+    width: 7em;
+    height: 2em;
+    padding: 0;
 }

 canvas {
@ -63,6 +68,7 @@ canvas {
    margin-bottom: 10px;
    border-radius: 5px;
    transition: background-color 0.2s ease;
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
 }

 .item-title {
@ -91,10 +97,8 @@ canvas {
 }

 .item.selected {
-    background-color: #d4e9ff;
-    /* Lighter blue for selection */
-    border-color: #003399;
-    /* Keep the original ECMWF blue for the border */
+    background-color: var(--background-grey);
+    border-color: var(--accent-color);
 }

 summary h2 {
@ -117,7 +121,7 @@ button {
    /* Padding around button text */
    margin: 0 5px;
    /* Margin between buttons */
-    background-color: #003399;
+    background-color: var(--accent-color);
    /* ECMWF blue */
    color: white;
    /* White text color */
@ -138,7 +142,6 @@ button:hover {

 .item-list-container {
    margin-top: 20px;
-    margin-bottom: 20px;
 }

 .scrollable-list {
@ -148,7 +151,6 @@ button:hover {
    border: 1px solid #ccc;
    border-radius: 4px;
    background-color: #fff;
-    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
 }

 .checkbox-container {
@ -168,14 +170,14 @@ button:hover {
 }

 .checkbox-container:hover .checkbox-label {
-    color: #003399;
+    color: var(--accent-color);
 }

 .list-label {
    font-weight: bold;
    margin-bottom: 0.5em;
    display: block;
-    color: #003399;
+    color: var(--accent-color);
 }

 span.key,
@ -209,3 +211,7 @@ span.value:hover {
        width: 100%;
    }
 }
+
+details h2 {
+    font-size: medium;
+}
--- a/web_query_builder/templates/index.html
+++ b/web_query_builder/templates/index.html
@ -5,6 +5,7 @@
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>ECMWF DestinE STAC Viewer</title>
    <link rel="stylesheet" href="/static/styles.css" />
+    <link rel="stylesheet" href="/static/qube_styles.css" />
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css">
    <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/json.min.js"></script>
@ -38,8 +39,25 @@
 }
            </code></pre>

+            <!-- Container to show the current tree -->
+            <h2>Currently Selected Tree</h2></summary>
+            <p>This shows the data <a href="https://qubed.readthedocs.io/en/latest/quickstart.html">qube</a> that matches with the current query. The leaves are the next set if available selections you can make. </p>
+            <pre id = "qube"></pre>
+
+            <details>
+                <summary><h2>Example Qubed Code</h2></summary>
+                See the <a href="https://qubed.readthedocs.io/en/latest/">Qubed documentation</a> for more details.
+                <pre><code id="example-python" class="language-python">
+# pip install qubed requests
+import requests
+from qubed import Qube
+qube = Qube.from_json(requests.get("{{ api_url }}select/climate-dt/?{{request.url.query}}").json())
+qube.print()
+                </code></pre>
+            </details>
+
            <!-- Container fo the raw STAC response -->
-            <details open>
+            <details>
                <summary><h2>Raw STAC Response</h2></summary>
                <p>See the <a href="https://github.com/ecmwf-projects/catalogs/blob/main/structured_stac.md">STAC Extension Proposal</a> for more details on the format.</p>
                <pre class="json-pre"><code id="raw-stac" class="language-json"></code></pre>
@ -54,7 +72,7 @@
    </div>

    <script>
-        window.API_URL = "{{ api_url }}";
+        window.API_URL = "{{ api_url }}stac/climate-dt/";
    </script>
    <script src="/static/app.js"></script>
 </body>
--- a/structured_stac.md
+++ b/structured_stac.md
@ -7,18 +7,39 @@
 - **Extension [Maturity Classification](https://github.com/radiantearth/stac-spec/tree/master/extensions/README.md#extension-maturity):** Proposal
 - **Owner**: @TomHodson

-This STAC extension allows for represention of [generalised datacubes][gen_datacubes]. 
+This STAC extension borrows the [Draft OGC Records API](https://docs.ogc.org/DRAFTS/20-004.html), specifically the [templated links section](https://docs.ogc.org/DRAFTS/20-004.html#sc_templated_links_with_variables) to give STAC the ability to index very large datasets that conform to a generalised datacube model.

-A datacube has a fixed set of dimensions `[a, b, c..]` , each of which have a fixed span `{a: ["temp","rainfall"], b : [1-7], c:[True, False]}` such that we can access data by indexing, i.e providing a value for each axis, `a="rainfall", b=1, ...`.  A generalised datacubes allow the dimensions to change during indexing, so choosing `a="rainfall"` might yield a different set of axes from `a="temp"`.
+A typical datacube has a fixed set of dimensions `[a, b, c..]` , each of which have a fixed span `{a: ["temp","rainfall"], b : [1-7], c:[True, False]}` such that we can access data by indexing, i.e providing a value for each axis, `a="rainfall", b=1, ...`.  A generalized datacube, by our defintion, allow the dimensions to change during indexing, so choosing `a="rainfall"` might yield a different set of axes from `a="temp"`.

-The [STAC Datacube][datacube_extension] extension serves the needs of datacubes that appear in STAC as Items or Collections, i.e as leaves in the tree. This extension instead focussing on allowing STAC to serve as an interface to dynamically explore the branches of generalised datacubes. It does this by adding additional metadata to the children of Catalog entries.
+The [STAC Datacube][datacube_extension] extension serves the needs of datacubes that appear in STAC as Items or Collections, i.e as leaves in the tree. This extension instead focussing on allowing STAC to serve as an interface to dynamically explore the branches of generalised datacubes. It does this by adding additional metadata from the OGC Records standard to the children of Catalog entries.

-We take the *Dimension Objects* defined by the [Datacube Extension][datacube_extension] and add them to [Link objects][link_objects] under the key `generalized_datacube:dimension`. This enables a single Link Object to represent a whole axis and its allowed values. Since `href` must now be constructed dynamically, we rempve it and add a `generalized_datacube:href_template` attribute to communicate how to construct the URLs corresponding to particular choice of value or values.
+In practice, what this proposal does is:

-In order to support more complex slicing operations in which multiple indices may be selected for a given dimensions we also add additional optional attributes to all *Dimension Objects*, these are:
+1. For child items that represent many distinct children, replace `"links":` with `"linkTemplates":` in the Catalog entry. (Following the example of the OGC Records API.)
+2. For each `rel: Child` object in `linkTemplates`:

-* `optional` : bool whether this dimension can be skipped.
-* `multiple` : boo wether multiple values can be selected for this key.
+    a. Add a `variables` key following the OGC Records API whose values is a dictionary with entries like 
+
+    ```json
+    "format": {
+            "type": "string",
+            "enum": [
+                "application/vnd.google-earth.kml+xml",
+                "application/vnd.google-earth.kmz",
+                "image/png",
+                "image/jpeg",
+                "image/gif",
+                "image/png; mode=8bit",
+                "application/x-pdf",
+                "image/svg+xml",
+                "image/tiff"
+            ]
+            }
+    ```
+
+    b. Add a "uriTemplate" key that specifies how to contruct the resulting URL: i.e `http://hostname.tld/app/index.html?class=od&format={format}`
+
+This enables a child object to represent a whole axis and its allowed values. Since `href` must now be constructed dynamically, we rempve it and add a `generalized_datacube:href_template` attribute to communicate how to construct the URLs corresponding to particular choice of value or values.

 [gen_datacubes]: https://github.com/ecmwf/datacube-spec
 [link_objects]: https://github.com/radiantearth/stac-spec/blob/master/commons/links.md#link-object
@ -34,19 +55,20 @@ A typical `Catalog` entry with this extension:
  "id": "rainfall",
  "stac_version": "1.0.0",
  "description": "ECMWF's Operational Data Archive",
-  "links": [
+  "linkTemplates": [
    {
-      "title": "Expver - Experiment Version",
-      "generalized_datacube:href_template": "http://136.156.129.226/app/index.html?class=od&expver={}",
      "rel": "child",
+      "title": "Expver - Experiment Version",
+      "uriTemplate": "http://hostname.tld/app/index.html?class=od&expver={expver}",
      "type": "application/json",
-      "generalized_datacube:dimension" : {
-        "type" : "enum",
+      "variables" : {
+        "expver" : {
            "description": "Experiment version, 0001 selects operational data.",
-        "values" : ["0001", "xxxx"],
+            "type" : "string",
+            "enum" : ["0001", "xxxx"],
            "value_descriptions" : ["Operational Data", "Experimental Data"],
            "optional" : false,
-        "multiple": true,
+        }
      }
      ""

@ -71,119 +93,18 @@ The fields in the table below can be used in these parts of STAC documents:
 - [x] Links

 | Field Name           | Type                      | Description                                                                                                           |
-| -------------------- | ------------------------- | -------------------------------------------------------- |
-| axis:dimension       | Dimension Object          | Whether the axis is an enum, date range, time range etc  |
-| axis:href_template   | string                    | Whether the axis is an enum, date range, time range etc  |
+| -------------------- | ------------------------- | --------------------------------------------------------------------------------------------------------------------- |
+| uriTemplate          | URI Template              | Of the form "http://hostname.tld/app/index.html?class=od&expver={expver}", follows OGC Records Spec for uriTemplates  |
+| variables            |                           |                                      |




 ### Additional Field Information

-#### axis:dimension
+#### uriTemplate
+Todo


-
-### Dimension Object
-
-The dimension object reuses all those [defined by the datacube extension](https://github.com/stac-extensions/datacube#dimension-object), currently those are Horizontal Spatial Raster, Vertical Spatial, Temporal Dimension, Spatial Vector Dimension, Additional Dimension. They are reproduced below for reference.
-
-These dimension objects are defined in addition:
-
-### Enum Dimension Object
-
-
-| Field Name       | Type              | Description                                                  |
-| ---------------- | ----------------- | ------------------------------------------------------------ |
-| type             | string            | **REQUIRED.**  `enum`.                                       |
-| description      | string            | Detailed multi-line description to explain the dimension. [CommonMark 0.29](http://commonmark.org/) syntax MAY be used for rich text representation. |
-| values           | \[number\|string] | An ordered list of all values, especially useful for [nominal](https://en.wikipedia.org/wiki/Level_of_measurement#Nominal_level) values. |
-| value_descriptions           | \[string] | Optionally provide a human readable description for each value.  Useful if the values are codes that have defined meanings. |
-| step             | number\|null      | If the dimension consists of [interval](https://en.wikipedia.org/wiki/Level_of_measurement#Interval_scale) values, the space between the values. Use `null` for irregularly spaced steps. |
-| unit             | string            | The unit of measurement for the data, preferably compliant to [UDUNITS-2](https://ncics.org/portfolio/other-resources/udunits2/) units (singular). |
-| reference_system | string            | The reference system for the data.                           |
-
-An Enum Dimension Object MUST specify `values`.
-
-Dimension objects degined by the datacube extension:
-
-### Horizontal Spatial Raster Dimension Object
-
-A spatial raster dimension in one of the horizontal (x or y) directions.
-
-| Field Name       | Type           | Description                                                  |
-| ---------------- | -------------- | ------------------------------------------------------------ |
-| type             | string         | **REQUIRED.** Type of the dimension, always `spatial`.       |
-| axis             | string         | **REQUIRED.** Axis of the spatial raster dimension (`x`, `y`).      |
-| description      | string         | Detailed multi-line description to explain the dimension. [CommonMark 0.29](http://commonmark.org/) syntax MAY be used for rich text representation. |
-| extent           | \[number]      | **REQUIRED.** Extent (lower and upper bounds) of the dimension as two-element array. Open intervals with `null` are not allowed. |
-| values           | \[number]      | Optionally, an ordered list of all values.                   |
-| step             | number\|null   | The space between the values. Use `null` for irregularly spaced steps. |
-| reference_system | string\|number\|object | The spatial reference system for the data, specified as [numerical EPSG code](http://www.epsg-registry.org/), [WKT2 (ISO 19162) string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html) or [PROJJSON object](https://proj.org/specifications/projjson.html). Defaults to EPSG code 4326. |
-
-### Vertical Spatial Dimension Object
-
-A spatial dimension in vertical (z) direction.
-
-| Field Name       | Type             | Description                                                  |
-| ---------------- | ---------------- | ------------------------------------------------------------ |
-| type             | string           | **REQUIRED.** Type of the dimension, always `spatial`.       |
-| axis             | string           | **REQUIRED.** Axis of the spatial dimension, always `z`.     |
-| description      | string           | Detailed multi-line description to explain the dimension. [CommonMark 0.29](http://commonmark.org/) syntax MAY be used for rich text representation. |
-| extent           | \[number\|null\]   | If the dimension consists of [ordinal](https://en.wikipedia.org/wiki/Level_of_measurement#Ordinal_scale) values, the extent (lower and upper bounds) of the values as two-element array. Use `null` for open intervals. |
-| values           | \[number\|string\] | An ordered list of all values, especially useful for [nominal](https://en.wikipedia.org/wiki/Level_of_measurement#Nominal_level) values. |
-| step             | number\|null     | If the dimension consists of [interval](https://en.wikipedia.org/wiki/Level_of_measurement#Interval_scale) values, the space between the values. Use `null` for irregularly spaced steps. |
-| unit             | string           | The unit of measurement for the data, preferably compliant to [UDUNITS-2](https://ncics.org/portfolio/other-resources/udunits2/) units (singular). |
-| reference_system | string\|number\|object | The spatial reference system for the data, specified as [numerical EPSG code](http://www.epsg-registry.org/), [WKT2 (ISO 19162) string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html) or [PROJJSON object](https://proj.org/specifications/projjson.html). Defaults to EPSG code 4326. |
-
-A Vertical Spatial Dimension Object MUST specify an `extent` or `values`. It MAY specify both. 
-
-### Temporal Dimension Object
-
-A temporal dimension based on the ISO 8601 standard. The temporal reference system for the data is expected to be ISO 8601 compliant 
-(Gregorian calendar / UTC). Data not compliant with ISO 8601 can be represented as an *Additional Dimension Object* with `type` set to `temporal`.
-
-| Field Name | Type            | Description                                                  |
-| ---------- | --------------- | ------------------------------------------------------------ |
-| type       | string          | **REQUIRED.** Type of the dimension, always `temporal`.      |
-| description | string         | Detailed multi-line description to explain the dimension. [CommonMark 0.29](http://commonmark.org/) syntax MAY be used for rich text representation. |
-| extent     | \[string\|null] | **REQUIRED.** Extent (lower and upper bounds) of the dimension as two-element array. The dates and/or times must be strings compliant to [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). `null` is allowed for open date ranges. |
-| values     | \[string]       | If the dimension consists of an ordered list of specific values they can be listed here. The dates and/or times must be strings compliant to [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). |
-| step       | string\|null    | The space between the temporal instances as [ISO 8601 duration](https://en.wikipedia.org/wiki/ISO_8601#Durations), e.g. `P1D`. Use `null` for irregularly spaced steps. |
-
-### Spatial Vector Dimension Object
-
-A vector dimension that defines a spatial dimension based on geometries.
-
-| Field Name       | Type           | Description                                                  |
-| ---------------- | -------------- | ------------------------------------------------------------ |
-| type             | string         | **REQUIRED.** Type of the dimension, always `geometry`.    |
-| axes             | \[string]      | Axes of the vector dimension as an ordered set of `x`, `y` and `z`. Defaults to `x` and `y`. |
-| description      | string         | Detailed multi-line description to explain the dimension. [CommonMark 0.29](http://commonmark.org/) syntax MAY be used for rich text representation. |
-| bbox             | \[number]      | **REQUIRED.** A single bounding box of the geometries as defined for [STAC Collections](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#spatial-extent-object), but not nested. |
-| values           | \[string\]     | Optionally, a representation of the geometries. This could be a list of WKT strings or other identifiers. |
-| geometry_types   | \[[GeoJSON Types](https://www.rfc-editor.org/rfc/rfc7946#section-1.4)] | A set of geometry types. If not present, mixed geometry types must be assumed. |
-| reference_system | string\|number\|object | The spatial reference system for the data, specified as [numerical EPSG code](http://www.epsg-registry.org/), [WKT2 (ISO 19162) string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html) or [PROJJSON object](https://proj.org/specifications/projjson.html). Defaults to EPSG code 4326. |
-
-For a general explanation what a vector datacube and a vector dimension is, please read the article "[Vector Data Cubes](https://r-spatial.org/r/2022/09/12/vdc.html)".
-
-### Additional Dimension Object
-
-An additional dimension that is not `spatial`, but may be `temporal` if the data is not compliant with ISO 8601 (see below).
-
-| Field Name       | Type              | Description                                                  |
-| ---------------- | ----------------- | ------------------------------------------------------------ |
-| type             | string            | **REQUIRED.** Custom type of the dimension, never `spatial` or `geometry`. |
-| description      | string            | Detailed multi-line description to explain the dimension. [CommonMark 0.29](http://commonmark.org/) syntax MAY be used for rich text representation. |
-| extent           | \[number\|null]   | If the dimension consists of [ordinal](https://en.wikipedia.org/wiki/Level_of_measurement#Ordinal_scale) values, the extent (lower and upper bounds) of the values as two-element array. Use `null` for open intervals. |
-| values           | \[number\|string] | An ordered list of all values, especially useful for [nominal](https://en.wikipedia.org/wiki/Level_of_measurement#Nominal_level) values. |
-| step             | number\|null      | If the dimension consists of [interval](https://en.wikipedia.org/wiki/Level_of_measurement#Interval_scale) values, the space between the values. Use `null` for irregularly spaced steps. |
-| unit             | string            | The unit of measurement for the data, preferably compliant to [UDUNITS-2](https://ncics.org/portfolio/other-resources/udunits2/) units (singular). |
-| reference_system | string            | The reference system for the data.                           |
-
-An Additional Dimension Object MUST specify an `extent` or `values`. It MAY specify both.
-
-Note on "Additional Dimension" with type `temporal`:
-You can distinguish the "Temporal Dimension" from an "Additional Dimension" by checking whether the extent exists and contains strings.
-So if the `type` equals `temporal` and `extent` is an array of strings/null, then you have a "Temporal Dimension",
-otherwise you have an "Additional Dimension".
+#### variables
+Todo
--- a/test_scripts/reconstruct.py
+++ b/test_scripts/reconstruct.py
@ -10,7 +10,10 @@ class CompressedTreeFixed(CompressedTree):
        c.cache = {}
        ca = data["cache"]
        for k, v in ca.items():
-            g = {k2 : ca[str(v2)]["dict"][k2] if k2 in ca[str(v2)]["dict"] else v2 for k2, v2 in v["dict"].items()}
+            g = {
+                k2: ca[str(v2)]["dict"][k2] if k2 in ca[str(v2)]["dict"] else v2
+                for k2, v2 in v["dict"].items()
+            }
            c.cache[int(k)] = RefcountedDict(g)
            c.cache[int(k)].refcount = v["refcount"]

@ -20,10 +23,15 @@ class CompressedTreeFixed(CompressedTree):

    def reconstruct(self, max_depth=None) -> dict[str, dict]:
        "Reconstruct the tree as a normal nested dictionary"
+
        def reconstruct_node(h: int, depth: int) -> dict[str, dict]:
            if max_depth is not None and depth > max_depth:
                return {}
-            return {k : reconstruct_node(v, depth=depth+1) for k, v in self.cache[h].items()}
+            return {
+                k: reconstruct_node(v, depth=depth + 1)
+                for k, v in self.cache[h].items()
+            }
+
        return reconstruct_node(self.root_hash, depth=0)


@ -39,5 +47,6 @@ output_data_path = Path("data/compressed_tree_climate_dt_ecmwf_style.json")

 compressed_tree.save(output_data_path)

-print(f"climate dt compressed tree ecmwf style: {output_data_path.stat().st_size // 1e6:.1f} MB")
-
+print(
+    f"climate dt compressed tree ecmwf style: {output_data_path.stat().st_size // 1e6:.1f} MB"
+)
--- a/test_scripts/rust.py
+++ b/test_scripts/rust.py
@ -0,0 +1,81 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Sequence
+
+from qubed.rust import Qube as rsQube
+
+# q = pyQube.from_tree("""
+# root, class=d1
+# ├── dataset=another-value, generation=1/2/3
+# └── dataset=climate-dt/weather-dt, generation=1/2/3/4
+# """)
+# json_str = json.dumps(q.to_json())
+# rust_qube = Qube.from_json(json_str)
+# # print(repr(rust_qube))
+
+# # print(json_str)
+
+# expected = """root, class=d1
+# ├── dataset=another-value, generation=1/2/3
+# └── dataset=climate-dt/weather-dt, generation=1/2/3/4
+# """
+# assert repr(rust_qube) == expected
+# # print(rs_qube._repr_html_())
+
+# print(q | q)
+
+value = str | int | float | datetime
+
+
+class Qube(rsQube):
+    @classmethod
+    def empty(cls):
+        q = cls()
+        print(f"empty called {cls = } {q = }")
+        return q
+
+    @classmethod
+    def from_datacube(cls, datacube: dict[str, value | Sequence[value]]) -> Qube:
+        qube = cls.empty()
+        (key, values), *key_vals = list(datacube.items())
+        node = qube.add_node(qube.root, key, values)
+        for key, values in key_vals:
+            node = qube.add_node(parent=node, key=key, values=values)
+
+        return qube
+
+    @classmethod
+    def from_dict(cls, d: dict) -> Qube:
+        q = cls.empty()
+
+        def from_dict(parent, d: dict):
+            for k, children in d.items():
+                key, values = k.split("=")
+                values = values.split("/")
+
+                node = q.add_node(
+                    parent=parent,
+                    key=key,
+                    values=values,
+                )
+                from_dict(parent=node, d=children)
+
+        from_dict(q.root, d)
+        return q
+
+
+q = Qube.from_datacube({"a": ["4"], "b": "test", "c": ["1", "2", "3"]})
+
+print(q)
+print(repr(q))
+
+q = Qube.from_dict(
+    {
+        "a=2/3": {"b=1": {}},
+        "a2=a/b": {"b2=1/2": {}},
+    }
+)
+
+print(q)
+print(repr(q))
--- a/test_scripts/test.py
+++ b/test_scripts/test.py
@ -5,15 +5,15 @@ from tqdm import tqdm
 from pathlib import Path
 import json
 from more_itertools import chunked
+
 process = psutil.Process()

+
 def massage_request(r):
-    return {k : v if isinstance(v, list) else [v]
-            for k, v in r.items()}
+    return {k: v if isinstance(v, list) else [v] for k, v in r.items()}


 if __name__ == "__main__":
-
    config = """
 ---
 type: remote
@ -46,7 +46,9 @@ store: remote

    today = datetime.datetime.today()
    start = datetime.datetime.strptime("19920420", "%Y%m%d")
-    date_list = [start + datetime.timedelta(days=x) for x in range((today - start).days)]
+    date_list = [
+        start + datetime.timedelta(days=x) for x in range((today - start).days)
+    ]
    date_list = [d.strftime("%Y%m%d") for d in date_list if d not in visited_dates]
    for dates in chunked(tqdm(date_list), 5):
        print(dates[0])
--- a/test_scripts/update_dts.py
+++ b/test_scripts/update_dts.py
@ -0,0 +1,99 @@
+# Example script for ingesting data from an fdb into a qube
+# Notes
+# Uses fdb --compact
+# Splits by data in order to avoid out of memory problems with fdb --compact
+# Does a bit of processing like removing "year" and "month" keys
+# Might want to add datatypes and reordering of keys there too
+
+import json
+import subprocess
+from datetime import datetime, timedelta
+from time import time
+
+import psutil
+from qubed import Qube
+from tqdm import tqdm
+import requests
+
+process = psutil.Process()
+
+CHUNK_SIZE = timedelta(days=60)
+FILEPATH = "tests/example_qubes/full_dt.json"
+API = "https://qubed.lumi.apps.dte.destination-earth.eu/api/v1"
+
+with open("config/api.secret", "r") as f:
+    secret = f.read()
+
+def ecmwf_date(d):
+    return d.strftime("%Y%m%d")
+
+
+start_date = datetime.now() - timedelta(days=120)
+# start_date = datetime(1990, 1, 1)
+# end_date = datetime.now()
+end_date = datetime(2026, 1, 1)
+
+current_span = [end_date - CHUNK_SIZE, end_date]
+
+try:
+    qube = Qube.load(FILEPATH)
+except:
+    qube = Qube.empty()
+
+while current_span[0] > start_date:
+    for config in ["config/config-climate-dt.yaml", "config/config-extremes-dt.yaml"]:
+        t0 = time()
+        start, end = map(ecmwf_date, current_span)
+        print(f"Doing {config} {current_span[0].date()} - {current_span[1].date()}")
+        print(f"Current memory usage: {process.memory_info().rss / 1e9:.2g}GB")
+        print(f"{qube.n_nodes = }, {qube.n_leaves = },")
+
+        subqube = Qube.empty()
+        command = [
+            f"fdb list --compact --config {config} --minimum-keys=date class=d1,date={start}/{end}"
+        ]
+        try:
+            p = subprocess.run(
+                command,
+                text=True,
+                shell=True,
+                stderr=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                check=True,
+            )
+        except Exception as e:
+            print(f"Failed for {current_span} {e}")
+            continue
+
+        print("Got compact list")
+        for i, line in tqdm(enumerate(list(p.stdout.split("\n")))):
+            if not line.startswith("retrieve,class="):
+                continue
+
+            def split(t):
+                return t[0], t[1].split("/")
+
+            # Could do datatypes here
+            request = dict(split(v.split("=")) for v in line.strip().split(",")[1:])
+            request.pop("year", None)
+            request.pop("month", None)
+            # Could do things like date = year + month + day
+            q = Qube.from_datacube(request)
+            subqube = subqube | q
+        print("added to qube")
+
+        qube = qube | subqube
+        subqube.print(depth=2)
+        print(f"{subqube.n_nodes = }, {subqube.n_leaves = },")
+
+        requests.post(
+                API + "/union/climate-dt/",
+                headers = {"Authorization" : f"Bearer {secret}"},
+                json = subqube.to_json())
+
+        current_span = [current_span[0] - CHUNK_SIZE, current_span[0]]
+        print(
+            f"Did that taking {(time() - t0) / CHUNK_SIZE.days:2g} seconds per day ingested, total {(time() - t0):2g}s"
+        )
+    with open(FILEPATH, "w") as f:
+        json.dump(qube.to_json(), f)
--- a/tests/example_qubes/cads.json
+++ b/tests/example_qubes/cads.json
--- a/tests/example_qubes/climate_dt.json
+++ b/tests/example_qubes/climate_dt.json
--- a/tests/example_qubes/climate_dt_old_schema.json
+++ b/tests/example_qubes/climate_dt_old_schema.json
--- a/tests/example_qubes/extremes_dt.json
+++ b/tests/example_qubes/extremes_dt.json
--- a/tests/example_qubes/od.json
+++ b/tests/example_qubes/od.json
--- a/tests/test_basic_operations.py
+++ b/tests/test_basic_operations.py
@ -1,40 +1,160 @@
 from qubed import Qube

+q = Qube.from_tree("""
+root
+├── class=od
+│   ├── expver=0001
+│   │   ├── param=1
+│   │   └── param=2
+│   └── expver=0002
+│       ├── param=1
+│       └── param=2
+└── class=rd
+    ├── expver=0001
+    │   ├── param=1
+    │   ├── param=2
+    │   └── param=3
+    └── expver=0002
+        ├── param=1
+        └── param=2
+""")

-def test_eq():
-    d = {
-        "class=od" : {
-            "expver=0001": {"param=1":{}, "param=2":{}},
-            "expver=0002": {"param=1":{}, "param=2":{}},
-        },
-        "class=rd" : {
-            "expver=0001": {"param=1":{}, "param=2":{}, "param=3":{}},
-            "expver=0002": {"param=1":{}, "param=2":{}},
-        },
-    }
-    q = Qube.from_dict(d)
-    r = Qube.from_dict(d)

-    assert q == r
+def test_getitem():
+    assert q["class", "od"] == Qube.from_tree("""
+root
+├── expver=0001
+│   ├── param=1
+│   └── param=2
+└── expver=0002
+    ├── param=1
+    └── param=2
+""")
+
+    assert q["class", "od"]["expver", "0001"] == Qube.from_tree("""
+root
+├── param=1
+└── param=2""")
+

 def test_n_leaves():
-    q = Qube.from_dict({
-        "a=1/2/3" : {"b=1/2/3" : {"c=1/2/3" : {}}},
-        "a=5" : {  "b=4" : {  "c=4" : {}}}
-        })
+    q = Qube.from_dict(
+        {"a=1/2/3": {"b=1/2/3": {"c=1/2/3": {}}}, "a=5": {"b=4": {"c=4": {}}}}
+    )

    # Size is 3*3*3 + 1*1*1 = 27 + 1
    assert q.n_leaves == 27 + 1


-# def test_union():
-#         q = Qube.from_dict({"a=1/2/3" : {"b=1" : {}},})
-#         r = Qube.from_dict({"a=2/3/4" : {"b=2" : {}},})
+def test_n_leaves_empty():
+    assert Qube.empty().n_leaves == 0

-#         u = Qube.from_dict({
-#              "a=1" : {"b=1" : {}},
-#              "a=1/2/3" : {"b=1/2" : {}},
-#              "a=4" : {"b=2" : {}},
-#         })

-#         assert q | r == u
+def test_n_nodes_empty():
+    assert Qube.empty().n_nodes == 0
+
+
+def test_union():
+    q = Qube.from_dict(
+        {
+            "a=1/2/3": {"b=1": {}},
+        }
+    )
+    r = Qube.from_dict(
+        {
+            "a=2/3/4": {"b=2": {}},
+        }
+    )
+
+    u = Qube.from_dict(
+        {
+            "a=4": {"b=2": {}},
+            "a=1": {"b=1": {}},
+            "a=2/3": {"b=1/2": {}},
+        }
+    )
+
+    assert q | r == u
+
+
+def test_union_with_empty():
+    q = Qube.from_dict(
+        {
+            "a=1/2/3": {"b=1": {}},
+        }
+    )
+    assert q | Qube.empty() == q
+
+
+def test_union_2():
+    q = Qube.from_datacube(
+        {
+            "class": "d1",
+            "dataset": ["climate-dt", "another-value"],
+            "generation": ["1", "2", "3"],
+        }
+    )
+
+    r = Qube.from_datacube(
+        {
+            "class": "d1",
+            "dataset": ["weather-dt", "climate-dt"],
+            "generation": ["1", "2", "3", "4"],
+        }
+    )
+
+    u = Qube.from_dict(
+        {
+            "class=d1": {
+                "dataset=climate-dt/weather-dt": {
+                    "generation=1/2/3/4": {},
+                },
+                "dataset=another-value": {
+                    "generation=1/2/3": {},
+                },
+            }
+        }
+    )
+
+    assert q | r == u
+
+
+def test_difference():
+    q = Qube.from_dict(
+        {
+            "a=1/2/3/5": {"b=1": {}},
+        }
+    )
+    r = Qube.from_dict(
+        {
+            "a=2/3/4": {"b=1": {}},
+        }
+    )
+
+    i = Qube.from_dict(
+        {
+            "a=1/5": {"b=1": {}},
+        }
+    )
+
+    assert q - r == i
+
+
+def test_order_independence():
+    u = Qube.from_dict(
+        {
+            "a=4": {"b=2": {}},
+            "a=1": {"b=2": {}, "b=1": {}},
+            "a=2/3": {"b=1/2": {}},
+        }
+    )
+
+    v = Qube.from_dict(
+        {
+            "a=2/3": {"b=1/2": {}},
+            "a=4": {"b=2": {}},
+            "a=1": {"b=1": {}, "b=2": {}},
+        }
+    )
+
+    assert u == v
--- a/tests/test_compression.py
+++ b/tests/test_compression.py
@ -0,0 +1,76 @@
+from qubed import Qube
+
+
+def test_smoke():
+    q = Qube.from_dict(
+        {
+            "class=od": {
+                "expver=0001": {"param=1": {}, "param=2": {}},
+                "expver=0002": {"param=1": {}, "param=2": {}},
+            },
+            "class=rd": {
+                "expver=0001": {"param=1": {}, "param=2": {}, "param=3": {}},
+                "expver=0002": {"param=1": {}, "param=2": {}},
+            },
+        }
+    )
+
+    ct = Qube.from_tree("""
+    root
+    ├── class=od, expver=0001/0002, param=1/2
+    └── class=rd
+        ├── expver=0001, param=1/2/3
+        └── expver=0002, param=1/2
+                        """)
+
+    assert q.compress() == ct
+
+
+def test_2():
+    qube = Qube.from_dict(
+        {
+            "class=d1": {
+                "generation=1": {
+                    "date=20240728": {"time=0600": {"param=8/78/79": {}}},
+                    "date=20240828": {"time=0600": {"param=8/78/79": {}}},
+                    "date=20240928": {"time=0600": {"param=8/78/79": {}}},
+                }
+            }
+        }
+    )
+
+    target = Qube.from_datacube(
+        {
+            "class": "d1",
+            "generation": "1",
+            "date": ["20240728", "20240828", "20240928"],
+            "time": "0600",
+            "param": ["8", "78", "79"],
+        }
+    )
+    assert qube.compress() == target
+
+
+def test_removal_compression():
+    qube = Qube.from_dict(
+        {
+            "class=d1": {
+                "generation=1": {
+                    "month=07": {"date=20240728": {"time=0600": {"param=8/78/79": {}}}},
+                    "month=08": {"date=20240828": {"time=0600": {"param=8/78/79": {}}}},
+                    "month=09": {"date=20240928": {"time=0600": {"param=8/78/79": {}}}},
+                }
+            }
+        }
+    )
+
+    target = Qube.from_datacube(
+        {
+            "class": "d1",
+            "generation": "1",
+            "date": ["20240728", "20240828", "20240928"],
+            "time": "0600",
+            "param": ["8", "78", "79"],
+        }
+    )
+    assert qube.remove_by_key(["month"]) == target
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Tom	165bf5aca2	Tests passing checkpoint	2025-06-03 14:57:27 +02:00
Tom	aaafa28dfb	A bit more on the rust backend	2025-05-29 17:09:17 +02:00
Tom Hodson	3328a0375b	Fix update script a bit	2025-05-23 16:45:37 +00:00
Tom	ba2c67d812	Create example ingestion script	2025-05-23 10:55:32 +01:00
Tom	04b4ee24eb	Silence protobuf warning	2025-05-22 17:26:58 +01:00
Tom	7069b70dd4	remove prints	2025-05-22 14:42:49 +01:00
Tom	90ea736c43	flesh out rust implementation	2025-05-22 14:40:44 +01:00
Tom	959dac332d	Start writing rust backend	2025-05-19 10:20:12 +01:00
Tom	97c5abc38b	Update image link	2025-05-14 10:33:38 +01:00
Tom	1188733034	Update re	2025-05-14 10:21:48 +01:00
Tom	35bb8f0edd	Massive rewrite	2025-05-14 10:14:02 +01:00
Tom	ed4a9055fa	fix bug add testcases	2025-05-12 14:40:16 +01:00
Tom	110046b251	progress on metadata	2025-05-09 17:25:00 +01:00
Tom	a85b700084	Merge branch 'main' into metadata	2025-05-07 15:47:40 +01:00
Tom	271d06c65a	Update banner.svg	2025-05-06 15:16:34 +01:00
Tom	7c28c7023b	Update README.md	2025-05-06 15:16:22 +01:00
Tom	4924fdb804	Add banner	2025-05-06 10:19:11 +01:00
Tom	d246dae54d	Update climate_dt.json	2025-04-30 14:17:15 +02:00
Tom	07f9a24daa	Add require_match argument to select	2025-04-30 14:05:42 +02:00
Tom	b13a06a0cc	Update .gitignore	2025-04-30 14:05:42 +02:00
Tom	87c57ec2cc	add selection test	2025-04-30 14:05:42 +02:00
Tom	a957d26da7	Update README.md	2025-04-30 11:06:48 +02:00
Tom	80b0408722	Update README ulrs	2025-04-24 10:59:42 +01:00
Tom	79e9f83c8c	Add note about live server to docs	2025-04-24 10:54:11 +01:00
Tom	fff00ca6f1	Add test cads.json	2025-04-24 10:30:32 +01:00
Tom	fa646aee77	cosmetics	2025-04-24 10:28:52 +01:00
Tom	e04c0dd3bc	Add tests	2025-04-23 14:40:09 +01:00
Tom	4e777f295d	More work on metadata	2025-04-23 14:40:09 +01:00
Tom	1259ff08b6	first attempt	2025-04-23 14:38:33 +01:00
Tom	7b36a76154	Fix all of mypy's complaints.	2025-04-23 12:43:49 +01:00
Tom	10106ba6d8	Fix link templates	2025-04-17 09:31:51 +01:00
Tom Hodson	2fa99d775c	fix mars language	2025-04-16 16:19:34 +00:00
Tom	b5c2681f63	Quick and dirty simple stac endpoint	2025-04-16 17:05:40 +01:00
Tom	dfc61caa38	Moves installation -> development	2025-04-16 15:20:38 +01:00
Tom	a502cb6ab2	Rejig quickstart a bit	2025-04-16 15:13:06 +01:00
Tom Hodson	ca944521f9	fix	2025-04-15 14:04:52 +00:00
Tom Hodson	6ec4b044b8	fix urls	2025-04-15 14:02:01 +00:00
Tom Hodson	251bec14fc	update webapp	2025-04-15 13:50:03 +00:00
Tom	c3556ce6fa	Fix example code url	2025-04-15 14:43:37 +01:00
Tom	bf47401e6c	Add Qube.load	2025-04-12 16:06:29 +02:00
Tom	11014b07ea	Update climate_dt.json	2025-04-03 14:03:44 +01:00
Tom	b6a27fdadf	Update Qube.py	2025-04-03 14:02:50 +01:00
Tom	70b1fd65e5	add remove_by_key and improve compression	2025-04-02 17:51:20 +01:00
Tom	2e36db4268	Create od.json	2025-03-31 19:11:42 +01:00
Tom	6039a3a494	update cmd app	2025-03-31 19:11:15 +01:00
Tom	7ef930bc1c	Update climate_dt.json	2025-03-31 18:45:40 +01:00
Tom	79983f85a1	clean up chart	2025-03-31 17:24:20 +01:00
Tom	ab2f8cf3f3	Integrate web_query_builder and stac server backend	2025-03-31 16:36:04 +01:00
Tom Hodson	4502a942cb	integrate stac-server and web app	2025-03-31 12:48:13 +00:00
Tom Hodson	3017185950	stac_server	2025-03-31 12:40:23 +00:00
Tom	cd26905261	Update structured_stac.md	2025-03-31 10:37:56 +01:00
Tom	0d3c8248b0	update chart	2025-03-31 10:10:05 +01:00
Tom	b2aba5dd42	update build	2025-03-28 18:08:31 +00:00
Tom	79b97fd813	Update stac server and frontend	2025-03-28 17:50:29 +00:00
Tom	275831d186	Update climate dt schema	2025-03-28 16:32:46 +00:00
Tom	57877e1e0c	Update extremes_dt.json	2025-03-28 09:52:29 +00:00
Tom	cf9db41dc4	add extremes_dt test data	2025-03-28 09:31:26 +00:00
Tom	8f1735c076	Update .gitignore	2025-03-28 09:29:13 +00:00
Tom	39f348244d	Better error message for from_tree	2025-03-27 18:30:39 +00:00
Tom	2884f9fff8	update docs	2025-03-27 18:30:12 +00:00
Tom	df5360f29a	Add convert_dtypes and selection with functions	2025-03-27 16:02:58 +00:00
Tom	d2f3165fe8	Remove print statement	2025-03-25 15:07:06 +00:00
Tom	6b98f7b7a9	Add creation from tree representation	2025-03-25 15:01:23 +00:00
Tom	9beaaa2e10	make consumption by selection off by default	2025-03-24 15:30:34 +00:00
Tom	06c84fb20e	Fix selection bug to require that arguments be consumed by a branch	2025-03-24 15:28:06 +00:00
Tom	c31467fb04	Update fiab.md	2025-03-04 19:18:42 +01:00
Tom	6648502bf4	Add experimental wildcard value	2025-03-04 19:02:55 +01:00
Tom	e14b9ee12f	add extra reqs	2025-03-03 15:50:28 +00:00
Tom	48444cc3ce	Rename Values -> ValueGroup	2025-02-27 16:46:16 +00:00
Tom	8306fb4c3e	Add cmd line app	2025-02-27 16:45:57 +00:00
Tom	68ad80e435	Add pre-commit hooks and run them	2025-02-26 09:11:30 +00:00
Tom	162dd48748	Unfreeze datastructures	2025-02-24 13:24:33 +00:00
Tom	ef844c9b57	Add alt-click copy of nodes paths, flesh out range types	2025-02-24 11:06:11 +00:00
Tom	1f7c5dfecd	Remove notebooks	2025-02-21 10:50:14 +00:00
Tom	a23f366969	add docs requirements	2025-02-21 08:19:59 +00:00
Tom	ecccf336b4	Merge branch 'fiab'	2025-02-20 15:51:13 +00:00
Tom	4c941d34f8	Add fiab docs	2025-02-20 15:51:02 +00:00
Tom	a832e44e03	Add pypi version and wheel	2025-02-20 14:03:54 +00:00
Tom	11516a05ba	Update example_products.md	2025-02-20 13:14:11 +00:00
Tom	8f5b202621	Update example_products.md	2025-02-20 13:14:11 +00:00
Tom	4a16d16748	Create example_products.md	2025-02-20 13:14:11 +00:00
Tom	3de40e46ef	add example structure	2025-02-20 13:14:11 +00:00
Tom	8a2c5b341d	sort pressure levels	2025-02-20 13:14:11 +00:00
Harrison Cook	819c29d768	Add frequency	2025-02-20 13:14:11 +00:00
Tom	ee546cd788	A version that works on windows	2025-02-19 17:22:53 +00:00
Tom	9873241eab	Update update_version.sh	2025-02-19 17:12:23 +00:00
Tom	e432040321	swtich from cargo edit to sed	2025-02-19 17:00:57 +00:00
Tom	2d0c301062	Another try	2025-02-19 16:47:55 +00:00
Tom	52a82447f9	Another attempt	2025-02-19 16:14:24 +00:00
Tom	a70bd9f0cd	Ok another try to get dynamic versions working	2025-02-19 16:04:22 +00:00
Tom	1ca23ca4cf	Add a basic rust test in	2025-02-19 15:08:21 +00:00
Tom	bb61e6fe7c	fix deps	2025-02-19 15:00:38 +00:00
Tom	73dd9a16a8	Make the rust backend non-optional	2025-02-19 14:57:11 +00:00
Tom	967adb1a69	Tidy up package and remove version from cargo.toml	2025-02-19 14:56:22 +00:00
Tom	4bcb09180e	Add .datacubes()	2025-02-19 14:17:47 +00:00
Tom	ea07545dc0	fix set operations	2025-02-18 17:50:28 +00:00
Tom	9d4fcbe624	Set operations done	2025-02-18 07:15:22 +00:00
				`@ -0,0 +1 @@`
				`kubectl -n qubed logs deployment/stac-server`
				`@ -1 +0,0 @@`
				`from .fdb_schema_parser import FDBSchema, FDBSchemaFile, KeySpec, Key`