ipydrawio/notebooks/_meta/Schema Updater.ipynb

541 wiersze
14 KiB
Czysty Zwykły widok Historia

"cells": [
"cell_type": "markdown",
"id": "f544e4e0-4968-4de0-b33c-35b94aa89da5",
"metadata": {},
"source": [
"# Schema Updater\n",
"> This is not a demonstration of functionality, but an iterative notebook for updating the ipydrawio JSON schema, when it changes.\n",
"> The specific version of the files at play are found in `dodo.py`"
"cell_type": "code",
"execution_count": null,
"id": "b115e4b7-61c2-4baf-b841-c383db1edbca",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
2023-04-09 16:46:27 +00:00
"import re\n",
"import subprocess\n",
"import tempfile\n",
2023-04-09 16:46:27 +00:00
"from copy import deepcopy\n",
"from pathlib import Path\n",
"import IPython\n",
"import lxml.etree as ET\n",
"import pandas"
"cell_type": "code",
"execution_count": null,
"id": "e10946b0-d498-4d14-8cd0-da9e7c7b830f",
"metadata": {},
"outputs": [],
"source": [
"ROOT = Path(\"../..\").resolve()\n",
"assert ROOT.name == \"ipydrawio\"\n",
"assert (ROOT / \".git\").exists()"
"cell_type": "markdown",
"id": "96c17607-554d-4b50-b506-55bd51497604",
"metadata": {},
"source": [
"The user settings schema in lab is the source of truth for many elements of the "
"cell_type": "code",
"execution_count": null,
"id": "e035378e-3b3c-4eb1-9fa4-ccc5d0ab83ed",
"metadata": {},
"outputs": [],
"source": [
"SCHEMA_PATH = ROOT / \"packages/ipydrawio/schema/plugin.json\"\n",
"SCHEMA = json.loads(SCHEMA_PATH.read_text(encoding=\"utf-8\"))\n",
"cell_type": "markdown",
"id": "eb44b16b-98c1-4baf-ada3-7caaecfa13ac",
"metadata": {},
"source": [
"This is a continuous check as to whether git is happy"
"cell_type": "code",
"execution_count": null,
"id": "11c57ef8-70b3-4f7e-bc47-a7fb821dd304",
"metadata": {},
"outputs": [],
"source": [
"SCHEMA_PATH.write_text(json.dumps(SCHEMA, indent=2, sort_keys=True), encoding=\"utf-8\")\n",
"!jlpm prettier --write {ROOT}/packages/ipydrawio/schema/plugin.json\n",
"!git diff {ROOT}/packages/ipydrawio/schema/plugin.json || echo 'no change'"
"cell_type": "code",
"execution_count": null,
"id": "cea18c8c-63ea-476c-bfc9-b586c651e20f",
"metadata": {},
"outputs": [],
"source": [
"paths = [*(ROOT / \"build/fetched\").glob(\"*.html\")]\n",
"cell_type": "markdown",
"id": "b607c248-f4fb-46a9-811d-611126ca5508",
"metadata": {},
"source": [
"# The URL Params"
"cell_type": "code",
"execution_count": null,
"id": "a45709f3-de69-4d57-bc63-2e04f14d13ac",
"metadata": {},
"outputs": [],
"source": [
"h_url_params = paths[0]\n",
2023-04-09 16:46:27 +00:00
" f\"/files/{h_url_params}\",\n",
" height='600px\" style=\"height: 100%;',\n",
" width=\"100%\",\n",
"cell_type": "code",
"execution_count": null,
"id": "2bd5c728-346b-451d-88ab-3f4a0b29de69",
"metadata": {},
"outputs": [],
"source": [
"x_url_params = ET.HTML(h_url_params.read_bytes())\n",
"cell_type": "code",
"execution_count": null,
"id": "4c59d50d-dd52-4dab-a790-0d7543f9db63",
"metadata": {},
"outputs": [],
"source": [
"li_elements = x_url_params.xpath(\"//li/code[contains(text(), '=')]\")\n",
"li = li_elements[0]"
"cell_type": "markdown",
"id": "42a22e6f-c4af-4430-895b-9a5b22c01190",
"metadata": {},
"source": [
"We're interested in the context around the definition."
"cell_type": "code",
"execution_count": null,
"id": "77a3c450-0637-4141-b3e3-660f5a389899",
"metadata": {},
"outputs": [],
"source": [
"ET.tostring(li.getparent(), pretty_print=True, encoding=str)"
"cell_type": "markdown",
"id": "6248c5ec-4fb2-4dc9-9931-93e505eb1dc6",
"metadata": {},
"source": [
"# Clean up the description\n",
"The raw XML is not very nice to embed in JSON. So many double quotes!"
"cell_type": "code",
"execution_count": null,
"id": "32a00d47-2a0a-421e-9d06-35a6389c57bb",
"metadata": {},
"outputs": [],
"source": [
"def one_md_desc(li):\n",
" with tempfile.TemporaryDirectory() as td:\n",
" tdp = Path(td)\n",
" html = tdp / \"test1.html\"\n",
" md = tdp / \"test1.md\"\n",
2023-04-09 16:46:27 +00:00
" html.write_text(\n",
" f\"<ul>{ET.tostring(li.getparent(), encoding=str, pretty_print=True)}</ul>\",\n",
" )\n",
" args = [\"pandoc\", html, \"-f\", \"html\", \"-t\", \"commonmark\", \"-s\", \"-o\", md]\n",
" subprocess.check_call([*map(str, args)])\n",
" body = \" \".join([line.strip() for line in md.read_text().splitlines()])\n",
2023-04-09 16:46:27 +00:00
" body = re.sub(r\"\\[(.*)?\\]\\((.*?)\\)\", r\"_\\1_\", body)[2:].strip()\n",
" print(f\"`{li.text}`\")\n",
" body = body.replace(f\"`{li.text}`:\", f\"{li.text.split('=')[1]}:\")\n",
" print(\"\\t\", body)\n",
" return body\n",
2023-04-09 16:46:27 +00:00
" f\"\"\"> _this is the description of **`{li.text.split(\"=\")[0]}`**_\\n> ```md\\n{one_md_desc(li)}\\n```\"\"\",\n",
"cell_type": "code",
"execution_count": null,
"id": "d42e55c7-dd33-4e45-934d-107414387f23",
"metadata": {},
"outputs": [],
"source": [
"url_params = {\n",
" x.text.split(\"=\")[0]: {\"description\": one_md_desc(x)}\n",
" for x in li_elements\n",
2023-04-09 16:46:27 +00:00
" if \"&\" not in x.text\n",
"cell_type": "code",
"execution_count": null,
"id": "17de503f-a228-4491-b341-35ea1773961d",
"metadata": {},
"outputs": [],
"source": [
"old_url_params = SCHEMA[\"definitions\"][\"drawio-url-params\"][\"properties\"]\n",
"cell_type": "markdown",
"id": "6414f799-914e-42f8-9659-afd6183cb98f",
"metadata": {},
"source": [
"# What is No Longer Documented?"
"cell_type": "code",
"execution_count": null,
"id": "33f5e886-1e78-45bd-bf77-30089295937f",
"metadata": {},
"outputs": [],
"source": [
"not_documented = {*old_url_params} - {*url_params}\n",
"cell_type": "markdown",
"id": "a58a22c3-be84-4ee9-8f63-29ef118ac667",
"metadata": {},
"source": [
"# What have we Not Modeled?"
"cell_type": "code",
"execution_count": null,
"id": "c105ece5-ab6e-4924-a32d-6b481b8a431d",
"metadata": {},
"outputs": [],
"source": [
"not_modeled = {*url_params} - {*old_url_params}\n",
"IPython.display.JSON({k: url_params[k] for k in not_modeled})"
"cell_type": "markdown",
"id": "975f301b-5c66-4764-a40f-cfa8ba3e4fd1",
"metadata": {},
"source": [
"Let's get some refs, this is kind of icky."
"cell_type": "code",
"execution_count": null,
"id": "c19945a4-0ac4-4aa6-ab6a-d4d7e2470d51",
"metadata": {},
"outputs": [],
"source": [
" url_params[ok].update({attr: ov[attr]})\n",
" for attr in [\"$ref\", \"format\", \"pattern\", \"type\", \"minimum\"]\n",
" for ok, ov in old_url_params.items()\n",
" if ok in url_params and attr in ov\n",
"cell_type": "markdown",
"id": "cb0c3e61-039b-4321-9c8a-f3d6066f9d95",
"metadata": {},
"source": [
"# It's time to treat this like data."
"cell_type": "code",
"execution_count": null,
"id": "8af96050-0c65-4f20-b3ce-84ca52e0402d",
"metadata": {},
"outputs": [],
"source": [
"df = pandas.DataFrame([{\"id\": k, **v} for k, v in url_params.items()])\n",
"df = df.fillna(\"\").sort_values(\"id\").set_index([\"id\"])\n",
"cell_type": "markdown",
"id": "4f1656c5-0a1c-420c-8186-e31b28bf3abf",
"metadata": {},
"source": [
"# Let's update the old with the new."
"cell_type": "code",
"execution_count": null,
"id": "8e97e69c-760f-410c-beee-0f6cf3d95bcc",
"metadata": {},
"outputs": [],
"source": [
2023-04-09 16:46:27 +00:00
"df_old = (\n",
" pandas.DataFrame(\n",
" [\n",
" {\"id\": k, **v}\n",
" for k, v in SCHEMA[\"definitions\"][\"drawio-url-params\"][\"properties\"].items()\n",
" ],\n",
" )\n",
" .sort_values(\"id\")\n",
" .set_index([\"id\"])\n",
" .fillna(\"\")\n",
"cell_type": "code",
"execution_count": null,
"id": "d1d6a4a0-7c63-4fff-a80a-993fde31e5aa",
"metadata": {},
"outputs": [],
"source": [
"def clean_df(df):\n",
" return {\n",
" record[\"id\"]: {\n",
" rk: rv\n",
" for rk, rv in record.items()\n",
" if rk not in [\"id\"] and rv and isinstance(rv, str) and rv.strip()\n",
" }\n",
" for record in df.reset_index().to_dict(orient=\"records\")\n",
" }"
"cell_type": "code",
"execution_count": null,
"id": "d00e2403-c0da-40ec-9b9c-15a56bec89e9",
"metadata": {},
"outputs": [],
"source": [
"cleaned_df = clean_df(df)\n",
"cleaned_df_old = clean_df(df_old)"
"cell_type": "code",
"execution_count": null,
"id": "278e5ca5-3910-417f-ae37-ae8490e3667c",
"metadata": {},
"outputs": [],
"source": [
"# handle symmetric difference of keys\n",
"new_df = {}\n",
"for k, v in cleaned_df_old.items():\n",
" if k not in cleaned_df:\n",
" v = {**v}\n",
" if \"description\" in v:\n",
" v[\"description\"] += \" **TBD `14.6.10`**\"\n",
" new_df[k] = v\n",
"for k, v in cleaned_df.items():\n",
" if k not in cleaned_df_old:\n",
" v = {**v}\n",
" if \"description\" in v:\n",
" v[\"description\"] += \" **NEW `14.6.10`**\"\n",
" new_df[k] = v\n",
2023-04-09 16:46:27 +00:00
"for k in sorted({*cleaned_df_old, *cleaned_df}):\n",
" if k in new_df:\n",
" continue\n",
" new_df[k] = {**cleaned_df_old[k]}\n",
" new_df[k].update(cleaned_df[k])\n",
"# new_df"
"cell_type": "markdown",
"id": "7315f427-3b89-4c6c-9918-0a9e23c55049",
"metadata": {},
"source": [
"# One serious gotcha"
"cell_type": "code",
"execution_count": null,
"id": "69893051-02f6-4940-a3e3-72d49ed24f3c",
"metadata": {},
"outputs": [],
"source": [
2023-04-09 16:46:27 +00:00
"before_i18n, after_i18n = new_df[\"lang\"][\"description\"].split(\"*i18n*,\")\n",
"locales = sorted(\n",
" [\n",
" lang.replace(\"`\", \"\").strip().split(\":\")[0].strip()\n",
" for lang in new_df[\"lang\"][\"description\"].split(\"*i18n*,\")[1].split(\",\")\n",
" ],\n",
"cell_type": "code",
"execution_count": null,
"id": "2284b798-01d7-4a65-92f7-985bdbf36e37",
"metadata": {},
"outputs": [],
"source": [
2023-04-09 16:46:27 +00:00
"new_df[\"lang\"][\"description\"] = f\"\"\"{before_i18n} *i18n* {\", \".join(locales)}\"\"\"\n",
"new_df[\"lang\"][\"examples\"] = locales"
"cell_type": "markdown",
"id": "9b64ecf4-da25-44e2-8014-d2572bcf0cd4",
"metadata": {},
"source": [
"# Fill in some new guesses"
"cell_type": "code",
"execution_count": null,
"id": "cdda3d64-4b23-480a-9c8c-063b16ed890e",
"metadata": {},
"outputs": [],
"source": [
2023-04-09 16:46:27 +00:00
"new_df[\"configure\"][\"$ref\"] = new_df[\"pwa\"][\"$ref\"] = new_df[\"returnbounds\"][\n",
" \"$ref\"\n",
"] = new_df[\"svg-warning\"][\"$ref\"] = \"#/definitions/off-switch\""
"cell_type": "code",
"execution_count": null,
"id": "47e7f2cc-a564-46ba-b5d2-86409ca234b9",
"metadata": {},
"outputs": [],
"source": [
2023-04-09 16:46:27 +00:00
"new_df[\"hide-pages\"][\"$ref\"] = \"#/definitions/on-off-switch\""
"cell_type": "code",
"execution_count": null,
"id": "8857785d-75c8-4521-a8b2-db78a4450957",
"metadata": {},
"outputs": [],
"source": [
2023-04-09 16:46:27 +00:00
"new_df[\"math-output\"][\"examples\"] = [\"html\"]"
"cell_type": "code",
"execution_count": null,
"id": "5d52fb20-0081-40f0-8cd4-1aa35bacae25",
"metadata": {},
"outputs": [],
"source": [
2023-04-09 16:46:27 +00:00
"new_df[\"ui\"][\"examples\"] = [\"min\", \"atlas\", \"dark\", \"sketch\"]"
"cell_type": "code",
"execution_count": null,
"id": "9f4dfe17-2638-45f1-b476-7ce12c59aa28",
"metadata": {},
"outputs": [],
"source": [
"new_df[\"search-shapes\"][\"type\"] = \"string\""
"cell_type": "markdown",
"id": "afacc8ae-0f39-41af-a621-78b4a07ff7f1",
"metadata": {},
"source": [
"# Ok, let's ship it."
"cell_type": "code",
"execution_count": null,
"id": "983dc291-57b2-4250-a604-8f77e96fa526",
"metadata": {
"tags": []
"outputs": [],
"source": [
"NEW_SCHEMA = deepcopy(SCHEMA)\n",
"NEW_SCHEMA[\"definitions\"][\"drawio-url-params\"][\"properties\"] = new_df\n",
"NEW_SCHEMA_PATH = SCHEMA_PATH.parent / f\"{SCHEMA_PATH.name}.proposed.json\"\n",
2023-04-09 16:46:27 +00:00
" json.dumps(NEW_SCHEMA, indent=2, sort_keys=True),\n",
" encoding=\"utf-8\",\n",
"!jlpm --silent prettier --write {ROOT}/packages/ipydrawio/schema/plugin.json.proposed.json\n",
"!git diff --color-words --no-index {ROOT}/packages/ipydrawio/schema/plugin.json {ROOT}/packages/ipydrawio/schema/plugin.json.proposed.json"
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
"nbformat": 4,
"nbformat_minor": 5