Add new `--internal internal.db` option, deprecate legacy `_internal` database

Refs:
- #2157 
---------

Co-authored-by: Simon Willison <swillison@gmail.com>
pull/2165/head
Alex Garcia 2023-08-28 20:24:23 -07:00 zatwierdzone przez GitHub
rodzic d28f12092d
commit 92b8bf38c0
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
13 zmienionych plików z 108 dodań i 90 usunięć

Wyświetl plik

@ -256,6 +256,7 @@ class Datasette:
pdb=False, pdb=False,
crossdb=False, crossdb=False,
nolock=False, nolock=False,
internal=None,
): ):
self._startup_invoked = False self._startup_invoked = False
assert config_dir is None or isinstance( assert config_dir is None or isinstance(
@ -304,17 +305,18 @@ class Datasette:
self.add_database( self.add_database(
Database(self, is_mutable=False, is_memory=True), name="_memory" Database(self, is_mutable=False, is_memory=True), name="_memory"
) )
# memory_name is a random string so that each Datasette instance gets its own
# unique in-memory named database - otherwise unit tests can fail with weird
# errors when different instances accidentally share an in-memory database
self.add_database(
Database(self, memory_name=secrets.token_hex()), name="_internal"
)
self.internal_db_created = False
for file in self.files: for file in self.files:
self.add_database( self.add_database(
Database(self, file, is_mutable=file not in self.immutables) Database(self, file, is_mutable=file not in self.immutables)
) )
self.internal_db_created = False
if internal is None:
self._internal_database = Database(self, memory_name=secrets.token_hex())
else:
self._internal_database = Database(self, path=internal, mode="rwc")
self._internal_database.name = "__INTERNAL__"
self.cache_headers = cache_headers self.cache_headers = cache_headers
self.cors = cors self.cors = cors
config_files = [] config_files = []
@ -436,15 +438,14 @@ class Datasette:
await self._refresh_schemas() await self._refresh_schemas()
async def _refresh_schemas(self): async def _refresh_schemas(self):
internal_db = self.databases["_internal"] internal_db = self.get_internal_database()
if not self.internal_db_created: if not self.internal_db_created:
await init_internal_db(internal_db) await init_internal_db(internal_db)
self.internal_db_created = True self.internal_db_created = True
current_schema_versions = { current_schema_versions = {
row["database_name"]: row["schema_version"] row["database_name"]: row["schema_version"]
for row in await internal_db.execute( for row in await internal_db.execute(
"select database_name, schema_version from databases" "select database_name, schema_version from core_databases"
) )
} }
for database_name, db in self.databases.items(): for database_name, db in self.databases.items():
@ -459,7 +460,7 @@ class Datasette:
values = [database_name, db.is_memory, schema_version] values = [database_name, db.is_memory, schema_version]
await internal_db.execute_write( await internal_db.execute_write(
""" """
INSERT OR REPLACE INTO databases (database_name, path, is_memory, schema_version) INSERT OR REPLACE INTO core_databases (database_name, path, is_memory, schema_version)
VALUES {} VALUES {}
""".format( """.format(
placeholders placeholders
@ -554,8 +555,7 @@ class Datasette:
raise KeyError raise KeyError
return matches[0] return matches[0]
if name is None: if name is None:
# Return first database that isn't "_internal" name = [key for key in self.databases.keys()][0]
name = [key for key in self.databases.keys() if key != "_internal"][0]
return self.databases[name] return self.databases[name]
def add_database(self, db, name=None, route=None): def add_database(self, db, name=None, route=None):
@ -655,6 +655,9 @@ class Datasette:
def _metadata(self): def _metadata(self):
return self.metadata() return self.metadata()
def get_internal_database(self):
return self._internal_database
def plugin_config(self, plugin_name, database=None, table=None, fallback=True): def plugin_config(self, plugin_name, database=None, table=None, fallback=True):
"""Return config for plugin, falling back from specified database/table""" """Return config for plugin, falling back from specified database/table"""
plugins = self.metadata( plugins = self.metadata(
@ -978,7 +981,6 @@ class Datasette:
"hash": d.hash, "hash": d.hash,
} }
for name, d in self.databases.items() for name, d in self.databases.items()
if name != "_internal"
] ]
def _versions(self): def _versions(self):

Wyświetl plik

@ -148,9 +148,6 @@ async def inspect_(files, sqlite_extensions):
app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions) app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions)
data = {} data = {}
for name, database in app.databases.items(): for name, database in app.databases.items():
if name == "_internal":
# Don't include the in-memory _internal database
continue
counts = await database.table_counts(limit=3600 * 1000) counts = await database.table_counts(limit=3600 * 1000)
data[name] = { data[name] = {
"hash": database.hash, "hash": database.hash,
@ -476,6 +473,11 @@ def uninstall(packages, yes):
"--ssl-certfile", "--ssl-certfile",
help="SSL certificate file", help="SSL certificate file",
) )
@click.option(
"--internal",
type=click.Path(),
help="Path to a persistent Datasette internal SQLite database",
)
def serve( def serve(
files, files,
immutable, immutable,
@ -507,6 +509,7 @@ def serve(
nolock, nolock,
ssl_keyfile, ssl_keyfile,
ssl_certfile, ssl_certfile,
internal,
return_instance=False, return_instance=False,
): ):
"""Serve up specified SQLite database files with a web UI""" """Serve up specified SQLite database files with a web UI"""
@ -570,6 +573,7 @@ def serve(
pdb=pdb, pdb=pdb,
crossdb=crossdb, crossdb=crossdb,
nolock=nolock, nolock=nolock,
internal=internal,
) )
# if files is a single directory, use that as config_dir= # if files is a single directory, use that as config_dir=

Wyświetl plik

@ -29,7 +29,13 @@ AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file"))
class Database: class Database:
def __init__( def __init__(
self, ds, path=None, is_mutable=True, is_memory=False, memory_name=None self,
ds,
path=None,
is_mutable=True,
is_memory=False,
memory_name=None,
mode=None,
): ):
self.name = None self.name = None
self.route = None self.route = None
@ -50,6 +56,7 @@ class Database:
self._write_connection = None self._write_connection = None
# This is used to track all file connections so they can be closed # This is used to track all file connections so they can be closed
self._all_file_connections = [] self._all_file_connections = []
self.mode = mode
@property @property
def cached_table_counts(self): def cached_table_counts(self):
@ -90,6 +97,7 @@ class Database:
return conn return conn
if self.is_memory: if self.is_memory:
return sqlite3.connect(":memory:", uri=True) return sqlite3.connect(":memory:", uri=True)
# mode=ro or immutable=1? # mode=ro or immutable=1?
if self.is_mutable: if self.is_mutable:
qs = "?mode=ro" qs = "?mode=ro"
@ -100,6 +108,8 @@ class Database:
assert not (write and not self.is_mutable) assert not (write and not self.is_mutable)
if write: if write:
qs = "" qs = ""
if self.mode is not None:
qs = f"?mode={self.mode}"
conn = sqlite3.connect( conn = sqlite3.connect(
f"file:{self.path}{qs}", uri=True, check_same_thread=False f"file:{self.path}{qs}", uri=True, check_same_thread=False
) )

Wyświetl plik

@ -146,8 +146,6 @@ async def _resolve_metadata_view_permissions(datasette, actor, action, resource)
if allow is not None: if allow is not None:
return actor_matches_allow(actor, allow) return actor_matches_allow(actor, allow)
elif action == "view-database": elif action == "view-database":
if resource == "_internal" and (actor is None or actor.get("id") != "root"):
return False
database_allow = datasette.metadata("allow", database=resource) database_allow = datasette.metadata("allow", database=resource)
if database_allow is None: if database_allow is None:
return None return None

Wyświetl plik

@ -5,13 +5,13 @@ from datasette.utils import table_column_details
async def init_internal_db(db): async def init_internal_db(db):
create_tables_sql = textwrap.dedent( create_tables_sql = textwrap.dedent(
""" """
CREATE TABLE IF NOT EXISTS databases ( CREATE TABLE IF NOT EXISTS core_databases (
database_name TEXT PRIMARY KEY, database_name TEXT PRIMARY KEY,
path TEXT, path TEXT,
is_memory INTEGER, is_memory INTEGER,
schema_version INTEGER schema_version INTEGER
); );
CREATE TABLE IF NOT EXISTS tables ( CREATE TABLE IF NOT EXISTS core_tables (
database_name TEXT, database_name TEXT,
table_name TEXT, table_name TEXT,
rootpage INTEGER, rootpage INTEGER,
@ -19,7 +19,7 @@ async def init_internal_db(db):
PRIMARY KEY (database_name, table_name), PRIMARY KEY (database_name, table_name),
FOREIGN KEY (database_name) REFERENCES databases(database_name) FOREIGN KEY (database_name) REFERENCES databases(database_name)
); );
CREATE TABLE IF NOT EXISTS columns ( CREATE TABLE IF NOT EXISTS core_columns (
database_name TEXT, database_name TEXT,
table_name TEXT, table_name TEXT,
cid INTEGER, cid INTEGER,
@ -33,7 +33,7 @@ async def init_internal_db(db):
FOREIGN KEY (database_name) REFERENCES databases(database_name), FOREIGN KEY (database_name) REFERENCES databases(database_name),
FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name) FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name)
); );
CREATE TABLE IF NOT EXISTS indexes ( CREATE TABLE IF NOT EXISTS core_indexes (
database_name TEXT, database_name TEXT,
table_name TEXT, table_name TEXT,
seq INTEGER, seq INTEGER,
@ -45,7 +45,7 @@ async def init_internal_db(db):
FOREIGN KEY (database_name) REFERENCES databases(database_name), FOREIGN KEY (database_name) REFERENCES databases(database_name),
FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name) FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name)
); );
CREATE TABLE IF NOT EXISTS foreign_keys ( CREATE TABLE IF NOT EXISTS core_foreign_keys (
database_name TEXT, database_name TEXT,
table_name TEXT, table_name TEXT,
id INTEGER, id INTEGER,
@ -69,12 +69,16 @@ async def populate_schema_tables(internal_db, db):
database_name = db.name database_name = db.name
def delete_everything(conn): def delete_everything(conn):
conn.execute("DELETE FROM tables WHERE database_name = ?", [database_name]) conn.execute("DELETE FROM core_tables WHERE database_name = ?", [database_name])
conn.execute("DELETE FROM columns WHERE database_name = ?", [database_name])
conn.execute( conn.execute(
"DELETE FROM foreign_keys WHERE database_name = ?", [database_name] "DELETE FROM core_columns WHERE database_name = ?", [database_name]
)
conn.execute(
"DELETE FROM core_foreign_keys WHERE database_name = ?", [database_name]
)
conn.execute(
"DELETE FROM core_indexes WHERE database_name = ?", [database_name]
) )
conn.execute("DELETE FROM indexes WHERE database_name = ?", [database_name])
await internal_db.execute_write_fn(delete_everything) await internal_db.execute_write_fn(delete_everything)
@ -133,14 +137,14 @@ async def populate_schema_tables(internal_db, db):
await internal_db.execute_write_many( await internal_db.execute_write_many(
""" """
INSERT INTO tables (database_name, table_name, rootpage, sql) INSERT INTO core_tables (database_name, table_name, rootpage, sql)
values (?, ?, ?, ?) values (?, ?, ?, ?)
""", """,
tables_to_insert, tables_to_insert,
) )
await internal_db.execute_write_many( await internal_db.execute_write_many(
""" """
INSERT INTO columns ( INSERT INTO core_columns (
database_name, table_name, cid, name, type, "notnull", default_value, is_pk, hidden database_name, table_name, cid, name, type, "notnull", default_value, is_pk, hidden
) VALUES ( ) VALUES (
:database_name, :table_name, :cid, :name, :type, :notnull, :default_value, :is_pk, :hidden :database_name, :table_name, :cid, :name, :type, :notnull, :default_value, :is_pk, :hidden
@ -150,7 +154,7 @@ async def populate_schema_tables(internal_db, db):
) )
await internal_db.execute_write_many( await internal_db.execute_write_many(
""" """
INSERT INTO foreign_keys ( INSERT INTO core_foreign_keys (
database_name, table_name, "id", seq, "table", "from", "to", on_update, on_delete, match database_name, table_name, "id", seq, "table", "from", "to", on_update, on_delete, match
) VALUES ( ) VALUES (
:database_name, :table_name, :id, :seq, :table, :from, :to, :on_update, :on_delete, :match :database_name, :table_name, :id, :seq, :table, :from, :to, :on_update, :on_delete, :match
@ -160,7 +164,7 @@ async def populate_schema_tables(internal_db, db):
) )
await internal_db.execute_write_many( await internal_db.execute_write_many(
""" """
INSERT INTO indexes ( INSERT INTO core_indexes (
database_name, table_name, seq, name, "unique", origin, partial database_name, table_name, seq, name, "unique", origin, partial
) VALUES ( ) VALUES (
:database_name, :table_name, :seq, :name, :unique, :origin, :partial :database_name, :table_name, :seq, :name, :unique, :origin, :partial

Wyświetl plik

@ -950,9 +950,9 @@ class TableCreateView(BaseView):
async def _table_columns(datasette, database_name): async def _table_columns(datasette, database_name):
internal = datasette.get_database("_internal") internal_db = datasette.get_internal_database()
result = await internal.execute( result = await internal_db.execute(
"select table_name, name from columns where database_name = ?", "select table_name, name from core_columns where database_name = ?",
[database_name], [database_name],
) )
table_columns = {} table_columns = {}

Wyświetl plik

@ -238,7 +238,7 @@ class CreateTokenView(BaseView):
# Build list of databases and tables the user has permission to view # Build list of databases and tables the user has permission to view
database_with_tables = [] database_with_tables = []
for database in self.ds.databases.values(): for database in self.ds.databases.values():
if database.name in ("_internal", "_memory"): if database.name == "_memory":
continue continue
if not await self.ds.permission_allowed( if not await self.ds.permission_allowed(
request.actor, "view-database", database.name request.actor, "view-database", database.name

Wyświetl plik

@ -134,6 +134,8 @@ Once started you can access it at ``http://localhost:8001``
mode mode
--ssl-keyfile TEXT SSL key file --ssl-keyfile TEXT SSL key file
--ssl-certfile TEXT SSL certificate file --ssl-certfile TEXT SSL certificate file
--internal PATH Path to a persistent Datasette internal SQLite
database
--help Show this message and exit. --help Show this message and exit.

Wyświetl plik

@ -271,7 +271,7 @@ Property exposing a ``collections.OrderedDict`` of databases currently connected
The dictionary keys are the name of the database that is used in the URL - e.g. ``/fixtures`` would have a key of ``"fixtures"``. The values are :ref:`internals_database` instances. The dictionary keys are the name of the database that is used in the URL - e.g. ``/fixtures`` would have a key of ``"fixtures"``. The values are :ref:`internals_database` instances.
All databases are listed, irrespective of user permissions. This means that the ``_internal`` database will always be listed here. All databases are listed, irrespective of user permissions.
.. _datasette_permissions: .. _datasette_permissions:
@ -479,6 +479,13 @@ The following example creates a token that can access ``view-instance`` and ``vi
Returns the specified database object. Raises a ``KeyError`` if the database does not exist. Call this method without an argument to return the first connected database. Returns the specified database object. Raises a ``KeyError`` if the database does not exist. Call this method without an argument to return the first connected database.
.. _get_internal_database:
.get_internal_database()
------------------------
Returns a database object for reading and writing to the private :ref:`internal database <internals_internal>`.
.. _datasette_add_database: .. _datasette_add_database:
.add_database(db, name=None, route=None) .add_database(db, name=None, route=None)
@ -1127,19 +1134,21 @@ You can selectively disable CSRF protection using the :ref:`plugin_hook_skip_csr
.. _internals_internal: .. _internals_internal:
The _internal database Datasette's internal database
====================== =============================
.. warning:: Datasette maintains an "internal" SQLite database used for configuration, caching, and storage. Plugins can store configuration, settings, and other data inside this database. By default, Datasette will use a temporary in-memory SQLite database as the internal database, which is created at startup and destroyed at shutdown. Users of Datasette can optionally pass in a `--internal` flag to specify the path to a SQLite database to use as the internal database, which will persist internal data across Datasette instances.
This API should be considered unstable - the structure of these tables may change prior to the release of Datasette 1.0.
Datasette maintains an in-memory SQLite database with details of the the databases, tables and columns for all of the attached databases. The internal database is not exposed in the Datasette application by default, which means private data can safely be stored without worry of accidentally leaking information through the default Datasette interface and API. However, other plugins do have full read and write access to the internal database.
By default all actors are denied access to the ``view-database`` permission for the ``_internal`` database, so the database is not visible to anyone unless they :ref:`sign in as root <authentication_root>`. Plugins can access this database by calling ``internal_db = datasette.get_internal_database()`` and then executing queries using the :ref:`Database API <internals_database>`.
Plugins can access this database by calling ``db = datasette.get_database("_internal")`` and then executing queries using the :ref:`Database API <internals_database>`. Plugin authors are asked to practice good etiquette when using the internal database, as all plugins use the same database to store data. For example:
You can explore an example of this database by `signing in as root <https://latest.datasette.io/login-as-root>`__ to the ``latest.datasette.io`` demo instance and then navigating to `latest.datasette.io/_internal <https://latest.datasette.io/_internal>`__. 1. Use a unique prefix when creating tables, indices, and triggera in the internal database. If your plugin is called `datasette-xyz`, then prefix names with `datasette_xyz_*`.
2. Avoid long-running write statements that may stall or block other plugins that are trying to write at the same time.
3. Use temporary tables or shared in-memory attached databases when possible.
4. Avoid implementing features that could expose private data stored in the internal database by other plugins.
.. _internals_utils: .. _internals_utils:

Wyświetl plik

@ -120,7 +120,7 @@ def permission_allowed(datasette, actor, action):
assert ( assert (
2 2
== ( == (
await datasette.get_database("_internal").execute("select 1 + 1") await datasette.get_internal_database().execute("select 1 + 1")
).first()[0] ).first()[0]
) )
if action == "this_is_allowed_async": if action == "this_is_allowed_async":
@ -142,7 +142,8 @@ def startup(datasette):
async def inner(): async def inner():
# Run against _internal so tests that use the ds_client fixture # Run against _internal so tests that use the ds_client fixture
# (which has no databases yet on startup) do not fail: # (which has no databases yet on startup) do not fail:
result = await datasette.get_database("_internal").execute("select 1 + 1") internal_db = datasette.get_internal_database()
result = await internal_db.execute("select 1 + 1")
datasette._startup_hook_calculation = result.first()[0] datasette._startup_hook_calculation = result.first()[0]
return inner return inner

Wyświetl plik

@ -154,6 +154,7 @@ def test_metadata_yaml():
ssl_keyfile=None, ssl_keyfile=None,
ssl_certfile=None, ssl_certfile=None,
return_instance=True, return_instance=True,
internal=None,
) )
client = _TestClient(ds) client = _TestClient(ds)
response = client.get("/-/metadata.json") response = client.get("/-/metadata.json")
@ -368,3 +369,14 @@ def test_help_settings():
result = runner.invoke(cli, ["--help-settings"]) result = runner.invoke(cli, ["--help-settings"])
for setting in SETTINGS: for setting in SETTINGS:
assert setting.name in result.output assert setting.name in result.output
def test_internal_db(tmpdir):
runner = CliRunner()
internal_path = tmpdir / "internal.db"
assert not internal_path.exists()
result = runner.invoke(
cli, ["--memory", "--internal", str(internal_path), "--get", "/"]
)
assert result.exit_code == 0
assert internal_path.exists()

Wyświetl plik

@ -1,55 +1,35 @@
import pytest import pytest
@pytest.mark.asyncio # ensure refresh_schemas() gets called before interacting with internal_db
async def test_internal_only_available_to_root(ds_client): async def ensure_internal(ds_client):
cookie = ds_client.actor_cookie({"id": "root"}) await ds_client.get("/fixtures.json?sql=select+1")
assert (await ds_client.get("/_internal")).status_code == 403 return ds_client.ds.get_internal_database()
assert (
await ds_client.get("/_internal", cookies={"ds_actor": cookie})
).status_code == 200
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_internal_databases(ds_client): async def test_internal_databases(ds_client):
cookie = ds_client.actor_cookie({"id": "root"}) internal_db = await ensure_internal(ds_client)
databases = ( databases = await internal_db.execute("select * from core_databases")
await ds_client.get( assert len(databases) == 1
"/_internal/databases.json?_shape=array", cookies={"ds_actor": cookie} assert databases.rows[0]["database_name"] == "fixtures"
)
).json()
assert len(databases) == 2
internal, fixtures = databases
assert internal["database_name"] == "_internal"
assert internal["is_memory"] == 1
assert internal["path"] is None
assert isinstance(internal["schema_version"], int)
assert fixtures["database_name"] == "fixtures"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_internal_tables(ds_client): async def test_internal_tables(ds_client):
cookie = ds_client.actor_cookie({"id": "root"}) internal_db = await ensure_internal(ds_client)
tables = ( tables = await internal_db.execute("select * from core_tables")
await ds_client.get(
"/_internal/tables.json?_shape=array", cookies={"ds_actor": cookie}
)
).json()
assert len(tables) > 5 assert len(tables) > 5
table = tables[0] table = tables.rows[0]
assert set(table.keys()) == {"rootpage", "table_name", "database_name", "sql"} assert set(table.keys()) == {"rootpage", "table_name", "database_name", "sql"}
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_internal_indexes(ds_client): async def test_internal_indexes(ds_client):
cookie = ds_client.actor_cookie({"id": "root"}) internal_db = await ensure_internal(ds_client)
indexes = ( indexes = await internal_db.execute("select * from core_indexes")
await ds_client.get(
"/_internal/indexes.json?_shape=array", cookies={"ds_actor": cookie}
)
).json()
assert len(indexes) > 5 assert len(indexes) > 5
index = indexes[0] index = indexes.rows[0]
assert set(index.keys()) == { assert set(index.keys()) == {
"partial", "partial",
"name", "name",
@ -63,14 +43,10 @@ async def test_internal_indexes(ds_client):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_internal_foreign_keys(ds_client): async def test_internal_foreign_keys(ds_client):
cookie = ds_client.actor_cookie({"id": "root"}) internal_db = await ensure_internal(ds_client)
foreign_keys = ( foreign_keys = await internal_db.execute("select * from core_foreign_keys")
await ds_client.get(
"/_internal/foreign_keys.json?_shape=array", cookies={"ds_actor": cookie}
)
).json()
assert len(foreign_keys) > 5 assert len(foreign_keys) > 5
foreign_key = foreign_keys[0] foreign_key = foreign_keys.rows[0]
assert set(foreign_key.keys()) == { assert set(foreign_key.keys()) == {
"table", "table",
"seq", "seq",

Wyświetl plik

@ -329,7 +329,7 @@ def test_hook_extra_body_script(app_client, path, expected_extra_body_script):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_hook_asgi_wrapper(ds_client): async def test_hook_asgi_wrapper(ds_client):
response = await ds_client.get("/fixtures") response = await ds_client.get("/fixtures")
assert "_internal, fixtures" == response.headers["x-databases"] assert "fixtures" == response.headers["x-databases"]
def test_hook_extra_template_vars(restore_working_directory): def test_hook_extra_template_vars(restore_working_directory):