diff --git a/datasette/app.py b/datasette/app.py index 1871aeb1..4deb8697 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -256,6 +256,7 @@ class Datasette: pdb=False, crossdb=False, nolock=False, + internal=None, ): self._startup_invoked = False assert config_dir is None or isinstance( @@ -304,17 +305,18 @@ class Datasette: self.add_database( Database(self, is_mutable=False, is_memory=True), name="_memory" ) - # memory_name is a random string so that each Datasette instance gets its own - # unique in-memory named database - otherwise unit tests can fail with weird - # errors when different instances accidentally share an in-memory database - self.add_database( - Database(self, memory_name=secrets.token_hex()), name="_internal" - ) - self.internal_db_created = False for file in self.files: self.add_database( Database(self, file, is_mutable=file not in self.immutables) ) + + self.internal_db_created = False + if internal is None: + self._internal_database = Database(self, memory_name=secrets.token_hex()) + else: + self._internal_database = Database(self, path=internal, mode="rwc") + self._internal_database.name = "__INTERNAL__" + self.cache_headers = cache_headers self.cors = cors config_files = [] @@ -436,15 +438,14 @@ class Datasette: await self._refresh_schemas() async def _refresh_schemas(self): - internal_db = self.databases["_internal"] + internal_db = self.get_internal_database() if not self.internal_db_created: await init_internal_db(internal_db) self.internal_db_created = True - current_schema_versions = { row["database_name"]: row["schema_version"] for row in await internal_db.execute( - "select database_name, schema_version from databases" + "select database_name, schema_version from core_databases" ) } for database_name, db in self.databases.items(): @@ -459,7 +460,7 @@ class Datasette: values = [database_name, db.is_memory, schema_version] await internal_db.execute_write( """ - INSERT OR REPLACE INTO databases (database_name, path, is_memory, schema_version) + INSERT OR REPLACE INTO core_databases (database_name, path, is_memory, schema_version) VALUES {} """.format( placeholders @@ -554,8 +555,7 @@ class Datasette: raise KeyError return matches[0] if name is None: - # Return first database that isn't "_internal" - name = [key for key in self.databases.keys() if key != "_internal"][0] + name = [key for key in self.databases.keys()][0] return self.databases[name] def add_database(self, db, name=None, route=None): @@ -655,6 +655,9 @@ class Datasette: def _metadata(self): return self.metadata() + def get_internal_database(self): + return self._internal_database + def plugin_config(self, plugin_name, database=None, table=None, fallback=True): """Return config for plugin, falling back from specified database/table""" plugins = self.metadata( @@ -978,7 +981,6 @@ class Datasette: "hash": d.hash, } for name, d in self.databases.items() - if name != "_internal" ] def _versions(self): diff --git a/datasette/cli.py b/datasette/cli.py index 6ebb1985..1a5a8af3 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -148,9 +148,6 @@ async def inspect_(files, sqlite_extensions): app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions) data = {} for name, database in app.databases.items(): - if name == "_internal": - # Don't include the in-memory _internal database - continue counts = await database.table_counts(limit=3600 * 1000) data[name] = { "hash": database.hash, @@ -476,6 +473,11 @@ def uninstall(packages, yes): "--ssl-certfile", help="SSL certificate file", ) +@click.option( + "--internal", + type=click.Path(), + help="Path to a persistent Datasette internal SQLite database", +) def serve( files, immutable, @@ -507,6 +509,7 @@ def serve( nolock, ssl_keyfile, ssl_certfile, + internal, return_instance=False, ): """Serve up specified SQLite database files with a web UI""" @@ -570,6 +573,7 @@ def serve( pdb=pdb, crossdb=crossdb, nolock=nolock, + internal=internal, ) # if files is a single directory, use that as config_dir= diff --git a/datasette/database.py b/datasette/database.py index af39ac9e..cb01301e 100644 --- a/datasette/database.py +++ b/datasette/database.py @@ -29,7 +29,13 @@ AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file")) class Database: def __init__( - self, ds, path=None, is_mutable=True, is_memory=False, memory_name=None + self, + ds, + path=None, + is_mutable=True, + is_memory=False, + memory_name=None, + mode=None, ): self.name = None self.route = None @@ -50,6 +56,7 @@ class Database: self._write_connection = None # This is used to track all file connections so they can be closed self._all_file_connections = [] + self.mode = mode @property def cached_table_counts(self): @@ -90,6 +97,7 @@ class Database: return conn if self.is_memory: return sqlite3.connect(":memory:", uri=True) + # mode=ro or immutable=1? if self.is_mutable: qs = "?mode=ro" @@ -100,6 +108,8 @@ class Database: assert not (write and not self.is_mutable) if write: qs = "" + if self.mode is not None: + qs = f"?mode={self.mode}" conn = sqlite3.connect( f"file:{self.path}{qs}", uri=True, check_same_thread=False ) diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py index 63a66c3c..f0b086e9 100644 --- a/datasette/default_permissions.py +++ b/datasette/default_permissions.py @@ -146,8 +146,6 @@ async def _resolve_metadata_view_permissions(datasette, actor, action, resource) if allow is not None: return actor_matches_allow(actor, allow) elif action == "view-database": - if resource == "_internal" and (actor is None or actor.get("id") != "root"): - return False database_allow = datasette.metadata("allow", database=resource) if database_allow is None: return None diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index e4b49e80..215695ca 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -5,13 +5,13 @@ from datasette.utils import table_column_details async def init_internal_db(db): create_tables_sql = textwrap.dedent( """ - CREATE TABLE IF NOT EXISTS databases ( + CREATE TABLE IF NOT EXISTS core_databases ( database_name TEXT PRIMARY KEY, path TEXT, is_memory INTEGER, schema_version INTEGER ); - CREATE TABLE IF NOT EXISTS tables ( + CREATE TABLE IF NOT EXISTS core_tables ( database_name TEXT, table_name TEXT, rootpage INTEGER, @@ -19,7 +19,7 @@ async def init_internal_db(db): PRIMARY KEY (database_name, table_name), FOREIGN KEY (database_name) REFERENCES databases(database_name) ); - CREATE TABLE IF NOT EXISTS columns ( + CREATE TABLE IF NOT EXISTS core_columns ( database_name TEXT, table_name TEXT, cid INTEGER, @@ -33,7 +33,7 @@ async def init_internal_db(db): FOREIGN KEY (database_name) REFERENCES databases(database_name), FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name) ); - CREATE TABLE IF NOT EXISTS indexes ( + CREATE TABLE IF NOT EXISTS core_indexes ( database_name TEXT, table_name TEXT, seq INTEGER, @@ -45,7 +45,7 @@ async def init_internal_db(db): FOREIGN KEY (database_name) REFERENCES databases(database_name), FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name) ); - CREATE TABLE IF NOT EXISTS foreign_keys ( + CREATE TABLE IF NOT EXISTS core_foreign_keys ( database_name TEXT, table_name TEXT, id INTEGER, @@ -69,12 +69,16 @@ async def populate_schema_tables(internal_db, db): database_name = db.name def delete_everything(conn): - conn.execute("DELETE FROM tables WHERE database_name = ?", [database_name]) - conn.execute("DELETE FROM columns WHERE database_name = ?", [database_name]) + conn.execute("DELETE FROM core_tables WHERE database_name = ?", [database_name]) conn.execute( - "DELETE FROM foreign_keys WHERE database_name = ?", [database_name] + "DELETE FROM core_columns WHERE database_name = ?", [database_name] + ) + conn.execute( + "DELETE FROM core_foreign_keys WHERE database_name = ?", [database_name] + ) + conn.execute( + "DELETE FROM core_indexes WHERE database_name = ?", [database_name] ) - conn.execute("DELETE FROM indexes WHERE database_name = ?", [database_name]) await internal_db.execute_write_fn(delete_everything) @@ -133,14 +137,14 @@ async def populate_schema_tables(internal_db, db): await internal_db.execute_write_many( """ - INSERT INTO tables (database_name, table_name, rootpage, sql) + INSERT INTO core_tables (database_name, table_name, rootpage, sql) values (?, ?, ?, ?) """, tables_to_insert, ) await internal_db.execute_write_many( """ - INSERT INTO columns ( + INSERT INTO core_columns ( database_name, table_name, cid, name, type, "notnull", default_value, is_pk, hidden ) VALUES ( :database_name, :table_name, :cid, :name, :type, :notnull, :default_value, :is_pk, :hidden @@ -150,7 +154,7 @@ async def populate_schema_tables(internal_db, db): ) await internal_db.execute_write_many( """ - INSERT INTO foreign_keys ( + INSERT INTO core_foreign_keys ( database_name, table_name, "id", seq, "table", "from", "to", on_update, on_delete, match ) VALUES ( :database_name, :table_name, :id, :seq, :table, :from, :to, :on_update, :on_delete, :match @@ -160,7 +164,7 @@ async def populate_schema_tables(internal_db, db): ) await internal_db.execute_write_many( """ - INSERT INTO indexes ( + INSERT INTO core_indexes ( database_name, table_name, seq, name, "unique", origin, partial ) VALUES ( :database_name, :table_name, :seq, :name, :unique, :origin, :partial diff --git a/datasette/views/database.py b/datasette/views/database.py index d9abc38a..4647bedc 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -950,9 +950,9 @@ class TableCreateView(BaseView): async def _table_columns(datasette, database_name): - internal = datasette.get_database("_internal") - result = await internal.execute( - "select table_name, name from columns where database_name = ?", + internal_db = datasette.get_internal_database() + result = await internal_db.execute( + "select table_name, name from core_columns where database_name = ?", [database_name], ) table_columns = {} diff --git a/datasette/views/special.py b/datasette/views/special.py index c45a3eca..c1b84f8f 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -238,7 +238,7 @@ class CreateTokenView(BaseView): # Build list of databases and tables the user has permission to view database_with_tables = [] for database in self.ds.databases.values(): - if database.name in ("_internal", "_memory"): + if database.name == "_memory": continue if not await self.ds.permission_allowed( request.actor, "view-database", database.name diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 5657f480..8e333447 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -134,6 +134,8 @@ Once started you can access it at ``http://localhost:8001`` mode --ssl-keyfile TEXT SSL key file --ssl-certfile TEXT SSL certificate file + --internal PATH Path to a persistent Datasette internal SQLite + database --help Show this message and exit. diff --git a/docs/internals.rst b/docs/internals.rst index 4b82e11c..fe9a2fa7 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -271,7 +271,7 @@ Property exposing a ``collections.OrderedDict`` of databases currently connected The dictionary keys are the name of the database that is used in the URL - e.g. ``/fixtures`` would have a key of ``"fixtures"``. The values are :ref:`internals_database` instances. -All databases are listed, irrespective of user permissions. This means that the ``_internal`` database will always be listed here. +All databases are listed, irrespective of user permissions. .. _datasette_permissions: @@ -479,6 +479,13 @@ The following example creates a token that can access ``view-instance`` and ``vi Returns the specified database object. Raises a ``KeyError`` if the database does not exist. Call this method without an argument to return the first connected database. +.. _get_internal_database: + +.get_internal_database() +------------------------ + +Returns a database object for reading and writing to the private :ref:`internal database `. + .. _datasette_add_database: .add_database(db, name=None, route=None) @@ -1127,19 +1134,21 @@ You can selectively disable CSRF protection using the :ref:`plugin_hook_skip_csr .. _internals_internal: -The _internal database -====================== +Datasette's internal database +============================= -.. warning:: - This API should be considered unstable - the structure of these tables may change prior to the release of Datasette 1.0. +Datasette maintains an "internal" SQLite database used for configuration, caching, and storage. Plugins can store configuration, settings, and other data inside this database. By default, Datasette will use a temporary in-memory SQLite database as the internal database, which is created at startup and destroyed at shutdown. Users of Datasette can optionally pass in a `--internal` flag to specify the path to a SQLite database to use as the internal database, which will persist internal data across Datasette instances. -Datasette maintains an in-memory SQLite database with details of the the databases, tables and columns for all of the attached databases. +The internal database is not exposed in the Datasette application by default, which means private data can safely be stored without worry of accidentally leaking information through the default Datasette interface and API. However, other plugins do have full read and write access to the internal database. -By default all actors are denied access to the ``view-database`` permission for the ``_internal`` database, so the database is not visible to anyone unless they :ref:`sign in as root `. +Plugins can access this database by calling ``internal_db = datasette.get_internal_database()`` and then executing queries using the :ref:`Database API `. -Plugins can access this database by calling ``db = datasette.get_database("_internal")`` and then executing queries using the :ref:`Database API `. +Plugin authors are asked to practice good etiquette when using the internal database, as all plugins use the same database to store data. For example: -You can explore an example of this database by `signing in as root `__ to the ``latest.datasette.io`` demo instance and then navigating to `latest.datasette.io/_internal `__. +1. Use a unique prefix when creating tables, indices, and triggera in the internal database. If your plugin is called `datasette-xyz`, then prefix names with `datasette_xyz_*`. +2. Avoid long-running write statements that may stall or block other plugins that are trying to write at the same time. +3. Use temporary tables or shared in-memory attached databases when possible. +4. Avoid implementing features that could expose private data stored in the internal database by other plugins. .. _internals_utils: diff --git a/tests/plugins/my_plugin_2.py b/tests/plugins/my_plugin_2.py index d588342c..bb82b8c1 100644 --- a/tests/plugins/my_plugin_2.py +++ b/tests/plugins/my_plugin_2.py @@ -120,7 +120,7 @@ def permission_allowed(datasette, actor, action): assert ( 2 == ( - await datasette.get_database("_internal").execute("select 1 + 1") + await datasette.get_internal_database().execute("select 1 + 1") ).first()[0] ) if action == "this_is_allowed_async": @@ -142,7 +142,8 @@ def startup(datasette): async def inner(): # Run against _internal so tests that use the ds_client fixture # (which has no databases yet on startup) do not fail: - result = await datasette.get_database("_internal").execute("select 1 + 1") + internal_db = datasette.get_internal_database() + result = await internal_db.execute("select 1 + 1") datasette._startup_hook_calculation = result.first()[0] return inner diff --git a/tests/test_cli.py b/tests/test_cli.py index d9a10f22..e85bcef1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -154,6 +154,7 @@ def test_metadata_yaml(): ssl_keyfile=None, ssl_certfile=None, return_instance=True, + internal=None, ) client = _TestClient(ds) response = client.get("/-/metadata.json") @@ -368,3 +369,14 @@ def test_help_settings(): result = runner.invoke(cli, ["--help-settings"]) for setting in SETTINGS: assert setting.name in result.output + + +def test_internal_db(tmpdir): + runner = CliRunner() + internal_path = tmpdir / "internal.db" + assert not internal_path.exists() + result = runner.invoke( + cli, ["--memory", "--internal", str(internal_path), "--get", "/"] + ) + assert result.exit_code == 0 + assert internal_path.exists() diff --git a/tests/test_internal_db.py b/tests/test_internal_db.py index a666dd72..5276dc99 100644 --- a/tests/test_internal_db.py +++ b/tests/test_internal_db.py @@ -1,55 +1,35 @@ import pytest -@pytest.mark.asyncio -async def test_internal_only_available_to_root(ds_client): - cookie = ds_client.actor_cookie({"id": "root"}) - assert (await ds_client.get("/_internal")).status_code == 403 - assert ( - await ds_client.get("/_internal", cookies={"ds_actor": cookie}) - ).status_code == 200 +# ensure refresh_schemas() gets called before interacting with internal_db +async def ensure_internal(ds_client): + await ds_client.get("/fixtures.json?sql=select+1") + return ds_client.ds.get_internal_database() @pytest.mark.asyncio async def test_internal_databases(ds_client): - cookie = ds_client.actor_cookie({"id": "root"}) - databases = ( - await ds_client.get( - "/_internal/databases.json?_shape=array", cookies={"ds_actor": cookie} - ) - ).json() - assert len(databases) == 2 - internal, fixtures = databases - assert internal["database_name"] == "_internal" - assert internal["is_memory"] == 1 - assert internal["path"] is None - assert isinstance(internal["schema_version"], int) - assert fixtures["database_name"] == "fixtures" + internal_db = await ensure_internal(ds_client) + databases = await internal_db.execute("select * from core_databases") + assert len(databases) == 1 + assert databases.rows[0]["database_name"] == "fixtures" @pytest.mark.asyncio async def test_internal_tables(ds_client): - cookie = ds_client.actor_cookie({"id": "root"}) - tables = ( - await ds_client.get( - "/_internal/tables.json?_shape=array", cookies={"ds_actor": cookie} - ) - ).json() + internal_db = await ensure_internal(ds_client) + tables = await internal_db.execute("select * from core_tables") assert len(tables) > 5 - table = tables[0] + table = tables.rows[0] assert set(table.keys()) == {"rootpage", "table_name", "database_name", "sql"} @pytest.mark.asyncio async def test_internal_indexes(ds_client): - cookie = ds_client.actor_cookie({"id": "root"}) - indexes = ( - await ds_client.get( - "/_internal/indexes.json?_shape=array", cookies={"ds_actor": cookie} - ) - ).json() + internal_db = await ensure_internal(ds_client) + indexes = await internal_db.execute("select * from core_indexes") assert len(indexes) > 5 - index = indexes[0] + index = indexes.rows[0] assert set(index.keys()) == { "partial", "name", @@ -63,14 +43,10 @@ async def test_internal_indexes(ds_client): @pytest.mark.asyncio async def test_internal_foreign_keys(ds_client): - cookie = ds_client.actor_cookie({"id": "root"}) - foreign_keys = ( - await ds_client.get( - "/_internal/foreign_keys.json?_shape=array", cookies={"ds_actor": cookie} - ) - ).json() + internal_db = await ensure_internal(ds_client) + foreign_keys = await internal_db.execute("select * from core_foreign_keys") assert len(foreign_keys) > 5 - foreign_key = foreign_keys[0] + foreign_key = foreign_keys.rows[0] assert set(foreign_key.keys()) == { "table", "seq", diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 28fe720f..9761fa53 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -329,7 +329,7 @@ def test_hook_extra_body_script(app_client, path, expected_extra_body_script): @pytest.mark.asyncio async def test_hook_asgi_wrapper(ds_client): response = await ds_client.get("/fixtures") - assert "_internal, fixtures" == response.headers["x-databases"] + assert "fixtures" == response.headers["x-databases"] def test_hook_extra_template_vars(restore_working_directory):