{ "cells": [ { "cell_type": "markdown", "id": "f544e4e0-4968-4de0-b33c-35b94aa89da5", "metadata": {}, "source": [ "# Schema Updater\n", "\n", "> This is not a demonstration of functionality, but an iterative notebook for updating the ipydrawio JSON schema, when it changes.\n", "> The specific version of the files at play are found in `dodo.py`" ] }, { "cell_type": "code", "execution_count": null, "id": "b115e4b7-61c2-4baf-b841-c383db1edbca", "metadata": {}, "outputs": [], "source": [ "import json\n", "import re\n", "import subprocess\n", "import tempfile\n", "from copy import deepcopy\n", "from pathlib import Path\n", "\n", "import IPython\n", "import lxml.etree as ET\n", "import pandas" ] }, { "cell_type": "code", "execution_count": null, "id": "e10946b0-d498-4d14-8cd0-da9e7c7b830f", "metadata": {}, "outputs": [], "source": [ "ROOT = Path(\"../..\").resolve()\n", "assert ROOT.name == \"ipydrawio\"\n", "assert (ROOT / \".git\").exists()" ] }, { "cell_type": "markdown", "id": "96c17607-554d-4b50-b506-55bd51497604", "metadata": {}, "source": [ "The user settings schema in lab is the source of truth for many elements of the " ] }, { "cell_type": "code", "execution_count": null, "id": "e035378e-3b3c-4eb1-9fa4-ccc5d0ab83ed", "metadata": {}, "outputs": [], "source": [ "SCHEMA_PATH = ROOT / \"packages/ipydrawio/schema/plugin.json\"\n", "SCHEMA = json.loads(SCHEMA_PATH.read_text(encoding=\"utf-8\"))\n", "IPython.display.JSON(SCHEMA)" ] }, { "cell_type": "markdown", "id": "eb44b16b-98c1-4baf-ada3-7caaecfa13ac", "metadata": {}, "source": [ "This is a continuous check as to whether git is happy" ] }, { "cell_type": "code", "execution_count": null, "id": "11c57ef8-70b3-4f7e-bc47-a7fb821dd304", "metadata": {}, "outputs": [], "source": [ "SCHEMA_PATH.write_text(json.dumps(SCHEMA, indent=2, sort_keys=True), encoding=\"utf-8\")\n", "!jlpm prettier --write {ROOT}/packages/ipydrawio/schema/plugin.json\n", "!git diff {ROOT}/packages/ipydrawio/schema/plugin.json || echo 'no change'" ] }, { "cell_type": "code", "execution_count": null, "id": "cea18c8c-63ea-476c-bfc9-b586c651e20f", "metadata": {}, "outputs": [], "source": [ "paths = [*(ROOT / \"build/fetched\").glob(\"*.html\")]\n", "paths" ] }, { "cell_type": "markdown", "id": "b607c248-f4fb-46a9-811d-611126ca5508", "metadata": {}, "source": [ "# The URL Params" ] }, { "cell_type": "code", "execution_count": null, "id": "a45709f3-de69-4d57-bc63-2e04f14d13ac", "metadata": {}, "outputs": [], "source": [ "h_url_params = paths[0]\n", "IPython.display.IFrame(\n", " f\"/files/{h_url_params}\",\n", " height='600px\" style=\"height: 100%;',\n", " width=\"100%\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "2bd5c728-346b-451d-88ab-3f4a0b29de69", "metadata": {}, "outputs": [], "source": [ "x_url_params = ET.HTML(h_url_params.read_bytes())\n", "x_url_params" ] }, { "cell_type": "code", "execution_count": null, "id": "4c59d50d-dd52-4dab-a790-0d7543f9db63", "metadata": {}, "outputs": [], "source": [ "li_elements = x_url_params.xpath(\"//li/code[contains(text(), '=')]\")\n", "li = li_elements[0]" ] }, { "cell_type": "markdown", "id": "42a22e6f-c4af-4430-895b-9a5b22c01190", "metadata": {}, "source": [ "We're interested in the context around the definition." ] }, { "cell_type": "code", "execution_count": null, "id": "77a3c450-0637-4141-b3e3-660f5a389899", "metadata": {}, "outputs": [], "source": [ "ET.tostring(li.getparent(), pretty_print=True, encoding=str)" ] }, { "cell_type": "markdown", "id": "6248c5ec-4fb2-4dc9-9931-93e505eb1dc6", "metadata": {}, "source": [ "# Clean up the description\n", "\n", "The raw XML is not very nice to embed in JSON. So many double quotes!" ] }, { "cell_type": "code", "execution_count": null, "id": "32a00d47-2a0a-421e-9d06-35a6389c57bb", "metadata": {}, "outputs": [], "source": [ "def one_md_desc(li):\n", " with tempfile.TemporaryDirectory() as td:\n", " tdp = Path(td)\n", " html = tdp / \"test1.html\"\n", " md = tdp / \"test1.md\"\n", " html.write_text(\n", " f\"\",\n", " )\n", " args = [\"pandoc\", html, \"-f\", \"html\", \"-t\", \"commonmark\", \"-s\", \"-o\", md]\n", " subprocess.check_call([*map(str, args)])\n", " body = \" \".join([line.strip() for line in md.read_text().splitlines()])\n", " body = re.sub(r\"\\[(.*)?\\]\\((.*?)\\)\", r\"_\\1_\", body)[2:].strip()\n", " print(f\"`{li.text}`\")\n", " body = body.replace(f\"`{li.text}`:\", f\"{li.text.split('=')[1]}:\")\n", " print(\"\\t\", body)\n", " return body\n", "\n", "\n", "IPython.display.Markdown(\n", " f\"\"\"> _this is the description of **`{li.text.split(\"=\")[0]}`**_\\n> ```md\\n{one_md_desc(li)}\\n```\"\"\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "d42e55c7-dd33-4e45-934d-107414387f23", "metadata": {}, "outputs": [], "source": [ "url_params = {\n", " x.text.split(\"=\")[0]: {\"description\": one_md_desc(x)}\n", " for x in li_elements\n", " if \"&\" not in x.text\n", "}\n", "IPython.display.JSON(url_params)" ] }, { "cell_type": "code", "execution_count": null, "id": "17de503f-a228-4491-b341-35ea1773961d", "metadata": {}, "outputs": [], "source": [ "old_url_params = SCHEMA[\"definitions\"][\"drawio-url-params\"][\"properties\"]\n", "IPython.display.JSON(old_url_params)" ] }, { "cell_type": "markdown", "id": "6414f799-914e-42f8-9659-afd6183cb98f", "metadata": {}, "source": [ "# What is No Longer Documented?" ] }, { "cell_type": "code", "execution_count": null, "id": "33f5e886-1e78-45bd-bf77-30089295937f", "metadata": {}, "outputs": [], "source": [ "not_documented = {*old_url_params} - {*url_params}\n", "not_documented" ] }, { "cell_type": "markdown", "id": "a58a22c3-be84-4ee9-8f63-29ef118ac667", "metadata": {}, "source": [ "# What have we Not Modeled?" ] }, { "cell_type": "code", "execution_count": null, "id": "c105ece5-ab6e-4924-a32d-6b481b8a431d", "metadata": {}, "outputs": [], "source": [ "not_modeled = {*url_params} - {*old_url_params}\n", "IPython.display.JSON({k: url_params[k] for k in not_modeled})" ] }, { "cell_type": "markdown", "id": "975f301b-5c66-4764-a40f-cfa8ba3e4fd1", "metadata": {}, "source": [ "Let's get some refs, this is kind of icky." ] }, { "cell_type": "code", "execution_count": null, "id": "c19945a4-0ac4-4aa6-ab6a-d4d7e2470d51", "metadata": {}, "outputs": [], "source": [ "{\n", " url_params[ok].update({attr: ov[attr]})\n", " for attr in [\"$ref\", \"format\", \"pattern\", \"type\", \"minimum\"]\n", " for ok, ov in old_url_params.items()\n", " if ok in url_params and attr in ov\n", "}" ] }, { "cell_type": "markdown", "id": "cb0c3e61-039b-4321-9c8a-f3d6066f9d95", "metadata": {}, "source": [ "# It's time to treat this like data." ] }, { "cell_type": "code", "execution_count": null, "id": "8af96050-0c65-4f20-b3ce-84ca52e0402d", "metadata": {}, "outputs": [], "source": [ "df = pandas.DataFrame([{\"id\": k, **v} for k, v in url_params.items()])\n", "df = df.fillna(\"\").sort_values(\"id\").set_index([\"id\"])\n", "df" ] }, { "cell_type": "markdown", "id": "4f1656c5-0a1c-420c-8186-e31b28bf3abf", "metadata": {}, "source": [ "# Let's update the old with the new." ] }, { "cell_type": "code", "execution_count": null, "id": "8e97e69c-760f-410c-beee-0f6cf3d95bcc", "metadata": {}, "outputs": [], "source": [ "df_old = (\n", " pandas.DataFrame(\n", " [\n", " {\"id\": k, **v}\n", " for k, v in SCHEMA[\"definitions\"][\"drawio-url-params\"][\"properties\"].items()\n", " ],\n", " )\n", " .sort_values(\"id\")\n", " .set_index([\"id\"])\n", " .fillna(\"\")\n", ")\n", "df_old" ] }, { "cell_type": "code", "execution_count": null, "id": "d1d6a4a0-7c63-4fff-a80a-993fde31e5aa", "metadata": {}, "outputs": [], "source": [ "def clean_df(df):\n", " return {\n", " record[\"id\"]: {\n", " rk: rv\n", " for rk, rv in record.items()\n", " if rk not in [\"id\"] and rv and isinstance(rv, str) and rv.strip()\n", " }\n", " for record in df.reset_index().to_dict(orient=\"records\")\n", " }" ] }, { "cell_type": "code", "execution_count": null, "id": "d00e2403-c0da-40ec-9b9c-15a56bec89e9", "metadata": {}, "outputs": [], "source": [ "cleaned_df = clean_df(df)\n", "cleaned_df_old = clean_df(df_old)" ] }, { "cell_type": "code", "execution_count": null, "id": "278e5ca5-3910-417f-ae37-ae8490e3667c", "metadata": {}, "outputs": [], "source": [ "# handle symmetric difference of keys\n", "\n", "new_df = {}\n", "\n", "for k, v in cleaned_df_old.items():\n", " if k not in cleaned_df:\n", " v = {**v}\n", " if \"description\" in v:\n", " v[\"description\"] += \" **TBD `14.6.10`**\"\n", " new_df[k] = v\n", "for k, v in cleaned_df.items():\n", " if k not in cleaned_df_old:\n", " v = {**v}\n", " if \"description\" in v:\n", " v[\"description\"] += \" **NEW `14.6.10`**\"\n", " new_df[k] = v\n", "for k in sorted({*cleaned_df_old, *cleaned_df}):\n", " if k in new_df:\n", " continue\n", " new_df[k] = {**cleaned_df_old[k]}\n", " new_df[k].update(cleaned_df[k])\n", "# new_df" ] }, { "cell_type": "markdown", "id": "7315f427-3b89-4c6c-9918-0a9e23c55049", "metadata": {}, "source": [ "# One serious gotcha" ] }, { "cell_type": "code", "execution_count": null, "id": "69893051-02f6-4940-a3e3-72d49ed24f3c", "metadata": {}, "outputs": [], "source": [ "before_i18n, after_i18n = new_df[\"lang\"][\"description\"].split(\"*i18n*,\")\n", "locales = sorted(\n", " [\n", " lang.replace(\"`\", \"\").strip().split(\":\")[0].strip()\n", " for lang in new_df[\"lang\"][\"description\"].split(\"*i18n*,\")[1].split(\",\")\n", " ],\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "2284b798-01d7-4a65-92f7-985bdbf36e37", "metadata": {}, "outputs": [], "source": [ "new_df[\"lang\"][\"description\"] = f\"\"\"{before_i18n} *i18n* {\", \".join(locales)}\"\"\"\n", "new_df[\"lang\"][\"examples\"] = locales" ] }, { "cell_type": "markdown", "id": "9b64ecf4-da25-44e2-8014-d2572bcf0cd4", "metadata": {}, "source": [ "# Fill in some new guesses" ] }, { "cell_type": "code", "execution_count": null, "id": "cdda3d64-4b23-480a-9c8c-063b16ed890e", "metadata": {}, "outputs": [], "source": [ "new_df[\"configure\"][\"$ref\"] = new_df[\"pwa\"][\"$ref\"] = new_df[\"returnbounds\"][\n", " \"$ref\"\n", "] = new_df[\"svg-warning\"][\"$ref\"] = \"#/definitions/off-switch\"" ] }, { "cell_type": "code", "execution_count": null, "id": "47e7f2cc-a564-46ba-b5d2-86409ca234b9", "metadata": {}, "outputs": [], "source": [ "new_df[\"hide-pages\"][\"$ref\"] = \"#/definitions/on-off-switch\"" ] }, { "cell_type": "code", "execution_count": null, "id": "8857785d-75c8-4521-a8b2-db78a4450957", "metadata": {}, "outputs": [], "source": [ "new_df[\"math-output\"][\"examples\"] = [\"html\"]" ] }, { "cell_type": "code", "execution_count": null, "id": "5d52fb20-0081-40f0-8cd4-1aa35bacae25", "metadata": {}, "outputs": [], "source": [ "new_df[\"ui\"][\"examples\"] = [\"min\", \"atlas\", \"dark\", \"sketch\"]" ] }, { "cell_type": "code", "execution_count": null, "id": "9f4dfe17-2638-45f1-b476-7ce12c59aa28", "metadata": {}, "outputs": [], "source": [ "new_df[\"search-shapes\"][\"type\"] = \"string\"" ] }, { "cell_type": "markdown", "id": "afacc8ae-0f39-41af-a621-78b4a07ff7f1", "metadata": {}, "source": [ "# Ok, let's ship it." ] }, { "cell_type": "code", "execution_count": null, "id": "983dc291-57b2-4250-a604-8f77e96fa526", "metadata": { "tags": [] }, "outputs": [], "source": [ "NEW_SCHEMA = deepcopy(SCHEMA)\n", "NEW_SCHEMA[\"definitions\"][\"drawio-url-params\"][\"properties\"] = new_df\n", "NEW_SCHEMA_PATH = SCHEMA_PATH.parent / f\"{SCHEMA_PATH.name}.proposed.json\"\n", "NEW_SCHEMA_PATH.write_text(\n", " json.dumps(NEW_SCHEMA, indent=2, sort_keys=True),\n", " encoding=\"utf-8\",\n", ")\n", "!jlpm --silent prettier --write {ROOT}/packages/ipydrawio/schema/plugin.json.proposed.json\n", "!git diff --color-words --no-index {ROOT}/packages/ipydrawio/schema/plugin.json {ROOT}/packages/ipydrawio/schema/plugin.json.proposed.json" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 5 }