# Schema Updater

> This is not a demonstration of functionality, but an iterative notebook for updating the ipydrawio JSON schema, when it changes.
> The specific version of the files at play are found in `dodo.py`

In [None]:
import json
import re
import subprocess
import tempfile
from copy import deepcopy
from pathlib import Path

import IPython
import lxml.etree as ET
import pandas

In [None]:
ROOT = Path("../..").resolve()
assert ROOT.name == "ipydrawio"
assert (ROOT / ".git").exists()

The user settings schema in lab is the source of truth for many elements of the 

In [None]:
SCHEMA_PATH = ROOT / "packages/ipydrawio/schema/plugin.json"
SCHEMA = json.loads(SCHEMA_PATH.read_text(encoding="utf-8"))
IPython.display.JSON(SCHEMA)

This is a continuous check as to whether git is happy

In [None]:
SCHEMA_PATH.write_text(json.dumps(SCHEMA, indent=2, sort_keys=True), encoding="utf-8")
!jlpm prettier --write {ROOT}/packages/ipydrawio/schema/plugin.json
!git diff {ROOT}/packages/ipydrawio/schema/plugin.json || echo 'no change'

In [None]:
paths = [*(ROOT / "build/fetched").glob("*.html")]
paths

# The URL Params

In [None]:
h_url_params = paths[0]
IPython.display.IFrame(
 f"/files/{h_url_params}",
 height='600px" style="height: 100%;',
 width="100%",
)

In [None]:
x_url_params = ET.HTML(h_url_params.read_bytes())
x_url_params

In [None]:
li_elements = x_url_params.xpath("//li/code[contains(text(), '=')]")
li = li_elements[0]

We're interested in the context around the definition.

In [None]:
ET.tostring(li.getparent(), pretty_print=True, encoding=str)

# Clean up the description

The raw XML is not very nice to embed in JSON. So many double quotes!

In [None]:
def one_md_desc(li):
 with tempfile.TemporaryDirectory() as td:
 tdp = Path(td)
 html = tdp / "test1.html"
 md = tdp / "test1.md"
 html.write_text(
 f"",
 )
 args = ["pandoc", html, "-f", "html", "-t", "commonmark", "-s", "-o", md]
 subprocess.check_call([*map(str, args)])
 body = " ".join([line.strip() for line in md.read_text().splitlines()])
 body = re.sub(r"\[(.*)?\]\((.*?)\)", r"_\1_", body)[2:].strip()
 print(f"`{li.text}`")
 body = body.replace(f"`{li.text}`:", f"{li.text.split('=')[1]}:")
 print("\t", body)
 return body


IPython.display.Markdown(
 f"""> _this is the description of **`{li.text.split("=")[0]}`**_\n> ```md\n{one_md_desc(li)}\n```""",
)

In [None]:
url_params = {
 x.text.split("=")[0]: {"description": one_md_desc(x)}
 for x in li_elements
 if "&" not in x.text
}
IPython.display.JSON(url_params)

In [None]:
old_url_params = SCHEMA["definitions"]["drawio-url-params"]["properties"]
IPython.display.JSON(old_url_params)

# What is No Longer Documented?

In [None]:
not_documented = {*old_url_params} - {*url_params}
not_documented

# What have we Not Modeled?

In [None]:
not_modeled = {*url_params} - {*old_url_params}
IPython.display.JSON({k: url_params[k] for k in not_modeled})

Let's get some refs, this is kind of icky.

In [None]:
{
 url_params[ok].update({attr: ov[attr]})
 for attr in ["$ref", "format", "pattern", "type", "minimum"]
 for ok, ov in old_url_params.items()
 if ok in url_params and attr in ov
}

# It's time to treat this like data.

In [None]:
df = pandas.DataFrame([{"id": k, **v} for k, v in url_params.items()])
df = df.fillna("").sort_values("id").set_index(["id"])
df

# Let's update the old with the new.

In [None]:
df_old = (
 pandas.DataFrame(
 [
 {"id": k, **v}
 for k, v in SCHEMA["definitions"]["drawio-url-params"]["properties"].items()
 ],
 )
 .sort_values("id")
 .set_index(["id"])
 .fillna("")
)
df_old

In [None]:
def clean_df(df):
 return {
 record["id"]: {
 rk: rv
 for rk, rv in record.items()
 if rk not in ["id"] and rv and isinstance(rv, str) and rv.strip()
 }
 for record in df.reset_index().to_dict(orient="records")
 }

In [None]:
cleaned_df = clean_df(df)
cleaned_df_old = clean_df(df_old)

In [None]:
# handle symmetric difference of keys

new_df = {}

for k, v in cleaned_df_old.items():
 if k not in cleaned_df:
 v = {**v}
 if "description" in v:
 v["description"] += " **TBD `14.6.10`**"
 new_df[k] = v
for k, v in cleaned_df.items():
 if k not in cleaned_df_old:
 v = {**v}
 if "description" in v:
 v["description"] += " **NEW `14.6.10`**"
 new_df[k] = v
for k in sorted({*cleaned_df_old, *cleaned_df}):
 if k in new_df:
 continue
 new_df[k] = {**cleaned_df_old[k]}
 new_df[k].update(cleaned_df[k])
# new_df

# One serious gotcha

In [None]:
before_i18n, after_i18n = new_df["lang"]["description"].split("*i18n*,")
locales = sorted(
 [
 lang.replace("`", "").strip().split(":")[0].strip()
 for lang in new_df["lang"]["description"].split("*i18n*,")[1].split(",")
 ],
)

In [None]:
new_df["lang"]["description"] = f"""{before_i18n} *i18n* {", ".join(locales)}"""
new_df["lang"]["examples"] = locales

# Fill in some new guesses

In [None]:
new_df["configure"]["$ref"] = new_df["pwa"]["$ref"] = new_df["returnbounds"][
 "$ref"
] = new_df["svg-warning"]["$ref"] = "#/definitions/off-switch"

In [None]:
new_df["hide-pages"]["$ref"] = "#/definitions/on-off-switch"

In [None]:
new_df["math-output"]["examples"] = ["html"]

In [None]:
new_df["ui"]["examples"] = ["min", "atlas", "dark", "sketch"]

In [None]:
new_df["search-shapes"]["type"] = "string"

# Ok, let's ship it.

In [None]:
NEW_SCHEMA = deepcopy(SCHEMA)
NEW_SCHEMA["definitions"]["drawio-url-params"]["properties"] = new_df
NEW_SCHEMA_PATH = SCHEMA_PATH.parent / f"{SCHEMA_PATH.name}.proposed.json"
NEW_SCHEMA_PATH.write_text(
 json.dumps(NEW_SCHEMA, indent=2, sort_keys=True),
 encoding="utf-8",
)
!jlpm --silent prettier --write {ROOT}/packages/ipydrawio/schema/plugin.json.proposed.json
!git diff --color-words --no-index {ROOT}/packages/ipydrawio/schema/plugin.json {ROOT}/packages/ipydrawio/schema/plugin.json.proposed.json