Add new `--internal internal.db` option, deprecate legacy `_internal` database

Refs:
- #2157 
---------

Co-authored-by: Simon Willison <swillison@gmail.com>
pull/2165/head
Alex Garcia 2023-08-28 20:24:23 -07:00 zatwierdzone przez GitHub
rodzic d28f12092d
commit 92b8bf38c0
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
13 zmienionych plików z 108 dodań i 90 usunięć

Wyświetl plik

@ -256,6 +256,7 @@ class Datasette:
pdb=False,
crossdb=False,
nolock=False,
internal=None,
):
self._startup_invoked = False
assert config_dir is None or isinstance(
@ -304,17 +305,18 @@ class Datasette:
self.add_database(
Database(self, is_mutable=False, is_memory=True), name="_memory"
)
# memory_name is a random string so that each Datasette instance gets its own
# unique in-memory named database - otherwise unit tests can fail with weird
# errors when different instances accidentally share an in-memory database
self.add_database(
Database(self, memory_name=secrets.token_hex()), name="_internal"
)
self.internal_db_created = False
for file in self.files:
self.add_database(
Database(self, file, is_mutable=file not in self.immutables)
)
self.internal_db_created = False
if internal is None:
self._internal_database = Database(self, memory_name=secrets.token_hex())
else:
self._internal_database = Database(self, path=internal, mode="rwc")
self._internal_database.name = "__INTERNAL__"
self.cache_headers = cache_headers
self.cors = cors
config_files = []
@ -436,15 +438,14 @@ class Datasette:
await self._refresh_schemas()
async def _refresh_schemas(self):
internal_db = self.databases["_internal"]
internal_db = self.get_internal_database()
if not self.internal_db_created:
await init_internal_db(internal_db)
self.internal_db_created = True
current_schema_versions = {
row["database_name"]: row["schema_version"]
for row in await internal_db.execute(
"select database_name, schema_version from databases"
"select database_name, schema_version from core_databases"
)
}
for database_name, db in self.databases.items():
@ -459,7 +460,7 @@ class Datasette:
values = [database_name, db.is_memory, schema_version]
await internal_db.execute_write(
"""
INSERT OR REPLACE INTO databases (database_name, path, is_memory, schema_version)
INSERT OR REPLACE INTO core_databases (database_name, path, is_memory, schema_version)
VALUES {}
""".format(
placeholders
@ -554,8 +555,7 @@ class Datasette:
raise KeyError
return matches[0]
if name is None:
# Return first database that isn't "_internal"
name = [key for key in self.databases.keys() if key != "_internal"][0]
name = [key for key in self.databases.keys()][0]
return self.databases[name]
def add_database(self, db, name=None, route=None):
@ -655,6 +655,9 @@ class Datasette:
def _metadata(self):
return self.metadata()
def get_internal_database(self):
return self._internal_database
def plugin_config(self, plugin_name, database=None, table=None, fallback=True):
"""Return config for plugin, falling back from specified database/table"""
plugins = self.metadata(
@ -978,7 +981,6 @@ class Datasette:
"hash": d.hash,
}
for name, d in self.databases.items()
if name != "_internal"
]
def _versions(self):

Wyświetl plik

@ -148,9 +148,6 @@ async def inspect_(files, sqlite_extensions):
app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions)
data = {}
for name, database in app.databases.items():
if name == "_internal":
# Don't include the in-memory _internal database
continue
counts = await database.table_counts(limit=3600 * 1000)
data[name] = {
"hash": database.hash,
@ -476,6 +473,11 @@ def uninstall(packages, yes):
"--ssl-certfile",
help="SSL certificate file",
)
@click.option(
"--internal",
type=click.Path(),
help="Path to a persistent Datasette internal SQLite database",
)
def serve(
files,
immutable,
@ -507,6 +509,7 @@ def serve(
nolock,
ssl_keyfile,
ssl_certfile,
internal,
return_instance=False,
):
"""Serve up specified SQLite database files with a web UI"""
@ -570,6 +573,7 @@ def serve(
pdb=pdb,
crossdb=crossdb,
nolock=nolock,
internal=internal,
)
# if files is a single directory, use that as config_dir=

Wyświetl plik

@ -29,7 +29,13 @@ AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file"))
class Database:
def __init__(
self, ds, path=None, is_mutable=True, is_memory=False, memory_name=None
self,
ds,
path=None,
is_mutable=True,
is_memory=False,
memory_name=None,
mode=None,
):
self.name = None
self.route = None
@ -50,6 +56,7 @@ class Database:
self._write_connection = None
# This is used to track all file connections so they can be closed
self._all_file_connections = []
self.mode = mode
@property
def cached_table_counts(self):
@ -90,6 +97,7 @@ class Database:
return conn
if self.is_memory:
return sqlite3.connect(":memory:", uri=True)
# mode=ro or immutable=1?
if self.is_mutable:
qs = "?mode=ro"
@ -100,6 +108,8 @@ class Database:
assert not (write and not self.is_mutable)
if write:
qs = ""
if self.mode is not None:
qs = f"?mode={self.mode}"
conn = sqlite3.connect(
f"file:{self.path}{qs}", uri=True, check_same_thread=False
)

Wyświetl plik

@ -146,8 +146,6 @@ async def _resolve_metadata_view_permissions(datasette, actor, action, resource)
if allow is not None:
return actor_matches_allow(actor, allow)
elif action == "view-database":
if resource == "_internal" and (actor is None or actor.get("id") != "root"):
return False
database_allow = datasette.metadata("allow", database=resource)
if database_allow is None:
return None

Wyświetl plik

@ -5,13 +5,13 @@ from datasette.utils import table_column_details
async def init_internal_db(db):
create_tables_sql = textwrap.dedent(
"""
CREATE TABLE IF NOT EXISTS databases (
CREATE TABLE IF NOT EXISTS core_databases (
database_name TEXT PRIMARY KEY,
path TEXT,
is_memory INTEGER,
schema_version INTEGER
);
CREATE TABLE IF NOT EXISTS tables (
CREATE TABLE IF NOT EXISTS core_tables (
database_name TEXT,
table_name TEXT,
rootpage INTEGER,
@ -19,7 +19,7 @@ async def init_internal_db(db):
PRIMARY KEY (database_name, table_name),
FOREIGN KEY (database_name) REFERENCES databases(database_name)
);
CREATE TABLE IF NOT EXISTS columns (
CREATE TABLE IF NOT EXISTS core_columns (
database_name TEXT,
table_name TEXT,
cid INTEGER,
@ -33,7 +33,7 @@ async def init_internal_db(db):
FOREIGN KEY (database_name) REFERENCES databases(database_name),
FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name)
);
CREATE TABLE IF NOT EXISTS indexes (
CREATE TABLE IF NOT EXISTS core_indexes (
database_name TEXT,
table_name TEXT,
seq INTEGER,
@ -45,7 +45,7 @@ async def init_internal_db(db):
FOREIGN KEY (database_name) REFERENCES databases(database_name),
FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name)
);
CREATE TABLE IF NOT EXISTS foreign_keys (
CREATE TABLE IF NOT EXISTS core_foreign_keys (
database_name TEXT,
table_name TEXT,
id INTEGER,
@ -69,12 +69,16 @@ async def populate_schema_tables(internal_db, db):
database_name = db.name
def delete_everything(conn):
conn.execute("DELETE FROM tables WHERE database_name = ?", [database_name])
conn.execute("DELETE FROM columns WHERE database_name = ?", [database_name])
conn.execute("DELETE FROM core_tables WHERE database_name = ?", [database_name])
conn.execute(
"DELETE FROM foreign_keys WHERE database_name = ?", [database_name]
"DELETE FROM core_columns WHERE database_name = ?", [database_name]
)
conn.execute(
"DELETE FROM core_foreign_keys WHERE database_name = ?", [database_name]
)
conn.execute(
"DELETE FROM core_indexes WHERE database_name = ?", [database_name]
)
conn.execute("DELETE FROM indexes WHERE database_name = ?", [database_name])
await internal_db.execute_write_fn(delete_everything)
@ -133,14 +137,14 @@ async def populate_schema_tables(internal_db, db):
await internal_db.execute_write_many(
"""
INSERT INTO tables (database_name, table_name, rootpage, sql)
INSERT INTO core_tables (database_name, table_name, rootpage, sql)
values (?, ?, ?, ?)
""",
tables_to_insert,
)
await internal_db.execute_write_many(
"""
INSERT INTO columns (
INSERT INTO core_columns (
database_name, table_name, cid, name, type, "notnull", default_value, is_pk, hidden
) VALUES (
:database_name, :table_name, :cid, :name, :type, :notnull, :default_value, :is_pk, :hidden
@ -150,7 +154,7 @@ async def populate_schema_tables(internal_db, db):
)
await internal_db.execute_write_many(
"""
INSERT INTO foreign_keys (
INSERT INTO core_foreign_keys (
database_name, table_name, "id", seq, "table", "from", "to", on_update, on_delete, match
) VALUES (
:database_name, :table_name, :id, :seq, :table, :from, :to, :on_update, :on_delete, :match
@ -160,7 +164,7 @@ async def populate_schema_tables(internal_db, db):
)
await internal_db.execute_write_many(
"""
INSERT INTO indexes (
INSERT INTO core_indexes (
database_name, table_name, seq, name, "unique", origin, partial
) VALUES (
:database_name, :table_name, :seq, :name, :unique, :origin, :partial

Wyświetl plik

@ -950,9 +950,9 @@ class TableCreateView(BaseView):
async def _table_columns(datasette, database_name):
internal = datasette.get_database("_internal")
result = await internal.execute(
"select table_name, name from columns where database_name = ?",
internal_db = datasette.get_internal_database()
result = await internal_db.execute(
"select table_name, name from core_columns where database_name = ?",
[database_name],
)
table_columns = {}

Wyświetl plik

@ -238,7 +238,7 @@ class CreateTokenView(BaseView):
# Build list of databases and tables the user has permission to view
database_with_tables = []
for database in self.ds.databases.values():
if database.name in ("_internal", "_memory"):
if database.name == "_memory":
continue
if not await self.ds.permission_allowed(
request.actor, "view-database", database.name

Wyświetl plik

@ -134,6 +134,8 @@ Once started you can access it at ``http://localhost:8001``
mode
--ssl-keyfile TEXT SSL key file
--ssl-certfile TEXT SSL certificate file
--internal PATH Path to a persistent Datasette internal SQLite
database
--help Show this message and exit.

Wyświetl plik

@ -271,7 +271,7 @@ Property exposing a ``collections.OrderedDict`` of databases currently connected
The dictionary keys are the name of the database that is used in the URL - e.g. ``/fixtures`` would have a key of ``"fixtures"``. The values are :ref:`internals_database` instances.
All databases are listed, irrespective of user permissions. This means that the ``_internal`` database will always be listed here.
All databases are listed, irrespective of user permissions.
.. _datasette_permissions:
@ -479,6 +479,13 @@ The following example creates a token that can access ``view-instance`` and ``vi
Returns the specified database object. Raises a ``KeyError`` if the database does not exist. Call this method without an argument to return the first connected database.
.. _get_internal_database:
.get_internal_database()
------------------------
Returns a database object for reading and writing to the private :ref:`internal database <internals_internal>`.
.. _datasette_add_database:
.add_database(db, name=None, route=None)
@ -1127,19 +1134,21 @@ You can selectively disable CSRF protection using the :ref:`plugin_hook_skip_csr
.. _internals_internal:
The _internal database
======================
Datasette's internal database
=============================
.. warning::
This API should be considered unstable - the structure of these tables may change prior to the release of Datasette 1.0.
Datasette maintains an "internal" SQLite database used for configuration, caching, and storage. Plugins can store configuration, settings, and other data inside this database. By default, Datasette will use a temporary in-memory SQLite database as the internal database, which is created at startup and destroyed at shutdown. Users of Datasette can optionally pass in a `--internal` flag to specify the path to a SQLite database to use as the internal database, which will persist internal data across Datasette instances.
Datasette maintains an in-memory SQLite database with details of the the databases, tables and columns for all of the attached databases.
The internal database is not exposed in the Datasette application by default, which means private data can safely be stored without worry of accidentally leaking information through the default Datasette interface and API. However, other plugins do have full read and write access to the internal database.
By default all actors are denied access to the ``view-database`` permission for the ``_internal`` database, so the database is not visible to anyone unless they :ref:`sign in as root <authentication_root>`.
Plugins can access this database by calling ``internal_db = datasette.get_internal_database()`` and then executing queries using the :ref:`Database API <internals_database>`.
Plugins can access this database by calling ``db = datasette.get_database("_internal")`` and then executing queries using the :ref:`Database API <internals_database>`.
Plugin authors are asked to practice good etiquette when using the internal database, as all plugins use the same database to store data. For example:
You can explore an example of this database by `signing in as root <https://latest.datasette.io/login-as-root>`__ to the ``latest.datasette.io`` demo instance and then navigating to `latest.datasette.io/_internal <https://latest.datasette.io/_internal>`__.
1. Use a unique prefix when creating tables, indices, and triggera in the internal database. If your plugin is called `datasette-xyz`, then prefix names with `datasette_xyz_*`.
2. Avoid long-running write statements that may stall or block other plugins that are trying to write at the same time.
3. Use temporary tables or shared in-memory attached databases when possible.
4. Avoid implementing features that could expose private data stored in the internal database by other plugins.
.. _internals_utils:

Wyświetl plik

@ -120,7 +120,7 @@ def permission_allowed(datasette, actor, action):
assert (
2
== (
await datasette.get_database("_internal").execute("select 1 + 1")
await datasette.get_internal_database().execute("select 1 + 1")
).first()[0]
)
if action == "this_is_allowed_async":
@ -142,7 +142,8 @@ def startup(datasette):
async def inner():
# Run against _internal so tests that use the ds_client fixture
# (which has no databases yet on startup) do not fail:
result = await datasette.get_database("_internal").execute("select 1 + 1")
internal_db = datasette.get_internal_database()
result = await internal_db.execute("select 1 + 1")
datasette._startup_hook_calculation = result.first()[0]
return inner

Wyświetl plik

@ -154,6 +154,7 @@ def test_metadata_yaml():
ssl_keyfile=None,
ssl_certfile=None,
return_instance=True,
internal=None,
)
client = _TestClient(ds)
response = client.get("/-/metadata.json")
@ -368,3 +369,14 @@ def test_help_settings():
result = runner.invoke(cli, ["--help-settings"])
for setting in SETTINGS:
assert setting.name in result.output
def test_internal_db(tmpdir):
runner = CliRunner()
internal_path = tmpdir / "internal.db"
assert not internal_path.exists()
result = runner.invoke(
cli, ["--memory", "--internal", str(internal_path), "--get", "/"]
)
assert result.exit_code == 0
assert internal_path.exists()

Wyświetl plik

@ -1,55 +1,35 @@
import pytest
@pytest.mark.asyncio
async def test_internal_only_available_to_root(ds_client):
cookie = ds_client.actor_cookie({"id": "root"})
assert (await ds_client.get("/_internal")).status_code == 403
assert (
await ds_client.get("/_internal", cookies={"ds_actor": cookie})
).status_code == 200
# ensure refresh_schemas() gets called before interacting with internal_db
async def ensure_internal(ds_client):
await ds_client.get("/fixtures.json?sql=select+1")
return ds_client.ds.get_internal_database()
@pytest.mark.asyncio
async def test_internal_databases(ds_client):
cookie = ds_client.actor_cookie({"id": "root"})
databases = (
await ds_client.get(
"/_internal/databases.json?_shape=array", cookies={"ds_actor": cookie}
)
).json()
assert len(databases) == 2
internal, fixtures = databases
assert internal["database_name"] == "_internal"
assert internal["is_memory"] == 1
assert internal["path"] is None
assert isinstance(internal["schema_version"], int)
assert fixtures["database_name"] == "fixtures"
internal_db = await ensure_internal(ds_client)
databases = await internal_db.execute("select * from core_databases")
assert len(databases) == 1
assert databases.rows[0]["database_name"] == "fixtures"
@pytest.mark.asyncio
async def test_internal_tables(ds_client):
cookie = ds_client.actor_cookie({"id": "root"})
tables = (
await ds_client.get(
"/_internal/tables.json?_shape=array", cookies={"ds_actor": cookie}
)
).json()
internal_db = await ensure_internal(ds_client)
tables = await internal_db.execute("select * from core_tables")
assert len(tables) > 5
table = tables[0]
table = tables.rows[0]
assert set(table.keys()) == {"rootpage", "table_name", "database_name", "sql"}
@pytest.mark.asyncio
async def test_internal_indexes(ds_client):
cookie = ds_client.actor_cookie({"id": "root"})
indexes = (
await ds_client.get(
"/_internal/indexes.json?_shape=array", cookies={"ds_actor": cookie}
)
).json()
internal_db = await ensure_internal(ds_client)
indexes = await internal_db.execute("select * from core_indexes")
assert len(indexes) > 5
index = indexes[0]
index = indexes.rows[0]
assert set(index.keys()) == {
"partial",
"name",
@ -63,14 +43,10 @@ async def test_internal_indexes(ds_client):
@pytest.mark.asyncio
async def test_internal_foreign_keys(ds_client):
cookie = ds_client.actor_cookie({"id": "root"})
foreign_keys = (
await ds_client.get(
"/_internal/foreign_keys.json?_shape=array", cookies={"ds_actor": cookie}
)
).json()
internal_db = await ensure_internal(ds_client)
foreign_keys = await internal_db.execute("select * from core_foreign_keys")
assert len(foreign_keys) > 5
foreign_key = foreign_keys[0]
foreign_key = foreign_keys.rows[0]
assert set(foreign_key.keys()) == {
"table",
"seq",

Wyświetl plik

@ -329,7 +329,7 @@ def test_hook_extra_body_script(app_client, path, expected_extra_body_script):
@pytest.mark.asyncio
async def test_hook_asgi_wrapper(ds_client):
response = await ds_client.get("/fixtures")
assert "_internal, fixtures" == response.headers["x-databases"]
assert "fixtures" == response.headers["x-databases"]
def test_hook_extra_template_vars(restore_working_directory):