diff --git a/.github/workflows/deploy-latest.yml b/.github/workflows/deploy-latest.yml
index 2de0a8b6..43e46fb4 100644
--- a/.github/workflows/deploy-latest.yml
+++ b/.github/workflows/deploy-latest.yml
@@ -31,7 +31,7 @@ jobs:
- name: Run tests
run: pytest
- name: Build fixtures.db
- run: python tests/fixtures.py fixtures.db fixtures.json plugins
+ run: python tests/fixtures.py fixtures.db fixtures.json plugins --extra-db-filename extra_database.db
- name: Build docs.db
run: |-
cd docs
@@ -48,12 +48,12 @@ jobs:
run: |-
gcloud config set run/region us-central1
gcloud config set project datasette-222320
- datasette publish cloudrun fixtures.db \
+ datasette publish cloudrun fixtures.db extra_database.db \
-m fixtures.json \
--plugins-dir=plugins \
--branch=$GITHUB_SHA \
--version-note=$GITHUB_SHA \
- --extra-options="--setting template_debug 1" \
+ --extra-options="--setting template_debug 1 --crossdb" \
--install=pysqlite3-binary \
--service=datasette-latest
# Deploy docs.db to a different service
diff --git a/datasette/app.py b/datasette/app.py
index 9e15a162..e3272c6e 100644
--- a/datasette/app.py
+++ b/datasette/app.py
@@ -85,6 +85,9 @@ from .version import __version__
app_root = Path(__file__).parent.parent
+# https://github.com/simonw/datasette/issues/283#issuecomment-781591015
+SQLITE_LIMIT_ATTACHED = 10
+
Setting = collections.namedtuple("Setting", ("name", "default", "help"))
SETTINGS = (
Setting("default_page_size", 100, "Default page size for the table view"),
@@ -194,6 +197,7 @@ class Datasette:
version_note=None,
config_dir=None,
pdb=False,
+ crossdb=False,
):
assert config_dir is None or isinstance(
config_dir, Path
@@ -217,7 +221,8 @@ class Datasette:
self.inspect_data = inspect_data
self.immutables = set(immutables or [])
self.databases = collections.OrderedDict()
- if memory or not self.files:
+ self.crossdb = crossdb
+ if memory or crossdb or not self.files:
self.add_database(Database(self, is_memory=True), name="_memory")
# memory_name is a random string so that each Datasette instance gets its own
# unique in-memory named database - otherwise unit tests can fail with weird
@@ -499,6 +504,19 @@ class Datasette:
conn.execute(f"PRAGMA cache_size=-{self.setting('cache_size_kb')}")
# pylint: disable=no-member
pm.hook.prepare_connection(conn=conn, database=database, datasette=self)
+ # If self.crossdb and this is _memory, connect the first SQLITE_LIMIT_ATTACHED databases
+ if self.crossdb and database == "_memory":
+ count = 0
+ for db_name, db in self.databases.items():
+ if count >= SQLITE_LIMIT_ATTACHED or db.is_memory:
+ continue
+ sql = 'ATTACH DATABASE "file:{path}?{qs}" AS [{name}];'.format(
+ path=db.path,
+ qs="mode=ro" if db.is_mutable else "immutable=1",
+ name=db_name,
+ )
+ conn.execute(sql)
+ count += 1
def add_message(self, request, message, type=INFO):
if not hasattr(request, "_messages"):
diff --git a/datasette/cli.py b/datasette/cli.py
index 815f9718..96a41740 100644
--- a/datasette/cli.py
+++ b/datasette/cli.py
@@ -12,7 +12,7 @@ from subprocess import call
import sys
from runpy import run_module
import webbrowser
-from .app import Datasette, DEFAULT_SETTINGS, SETTINGS, pm
+from .app import Datasette, DEFAULT_SETTINGS, SETTINGS, SQLITE_LIMIT_ATTACHED, pm
from .utils import (
StartupError,
check_connection,
@@ -410,6 +410,11 @@ def uninstall(packages, yes):
is_flag=True,
help="Create database files if they do not exist",
)
+@click.option(
+ "--crossdb",
+ is_flag=True,
+ help="Enable cross-database joins using the /_memory database",
+)
@click.option(
"--ssl-keyfile",
help="SSL key file",
@@ -442,6 +447,7 @@ def serve(
pdb,
open_browser,
create,
+ crossdb,
ssl_keyfile,
ssl_certfile,
return_instance=False,
@@ -499,6 +505,7 @@ def serve(
secret=secret,
version_note=version_note,
pdb=pdb,
+ crossdb=crossdb,
)
# if files is a single directory, use that as config_dir=
@@ -591,3 +598,15 @@ async def check_databases(ds):
raise click.UsageError(
f"Connection to {database.path} failed check: {str(e.args[0])}"
)
+ # If --crossdb and more than SQLITE_LIMIT_ATTACHED show warning
+ if (
+ ds.crossdb
+ and len([db for db in ds.databases.values() if not db.is_memory])
+ > SQLITE_LIMIT_ATTACHED
+ ):
+ msg = (
+ "Warning: --crossdb only works with the first {} attached databases".format(
+ SQLITE_LIMIT_ATTACHED
+ )
+ )
+ click.echo(click.style(msg, bold=True, fg="yellow"), err=True)
diff --git a/datasette/database.py b/datasette/database.py
index cda36e6e..3579cce9 100644
--- a/datasette/database.py
+++ b/datasette/database.py
@@ -1,4 +1,5 @@
import asyncio
+from collections import namedtuple
from pathlib import Path
import janus
import queue
@@ -22,6 +23,8 @@ from .inspect import inspect_hash
connections = threading.local()
+AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file"))
+
class Database:
def __init__(
@@ -78,7 +81,7 @@ class Database:
conn.execute("PRAGMA query_only=1")
return conn
if self.is_memory:
- return sqlite3.connect(":memory:")
+ return sqlite3.connect(":memory:", uri=True)
# mode=ro or immutable=1?
if self.is_mutable:
qs = "?mode=ro"
@@ -243,6 +246,12 @@ class Database:
return None
return Path(self.path).stat().st_mtime_ns
+ async def attached_databases(self):
+ results = await self.execute(
+ "select seq, name, file from pragma_database_list() where seq > 0"
+ )
+ return [AttachedDatabase(*row) for row in results.rows]
+
async def table_exists(self, table):
results = await self.execute(
"select 1 from sqlite_master where type='table' and name=?", params=(table,)
diff --git a/datasette/templates/database.html b/datasette/templates/database.html
index 7065f2c2..3fe7c891 100644
--- a/datasette/templates/database.html
+++ b/datasette/templates/database.html
@@ -56,6 +56,17 @@
{% endif %}
+{% if attached_databases %}
+
+
The following databases are attached to this connection, and can be used for cross-database joins:
+
+ {% for db_name in attached_databases %}
+ - {{ db_name }} - tables
+ {% endfor %}
+
+
+{% endif %}
+
{% for table in tables %}
{% if show_hidden or not table.hidden %}
diff --git a/datasette/views/database.py b/datasette/views/database.py
index 75eb8f02..0c58a351 100644
--- a/datasette/views/database.py
+++ b/datasette/views/database.py
@@ -115,6 +115,8 @@ class DatabaseView(DataView):
links.extend(extra_links)
return links
+ attached_databases = [d.name for d in await db.attached_databases()]
+
return (
{
"database": database,
@@ -139,6 +141,7 @@ class DatabaseView(DataView):
"allow_download": self.ds.setting("allow_download")
and not db.is_mutable
and not db.is_memory,
+ "attached_databases": attached_databases,
},
(f"database-{to_css_class(database)}.html", "database.html"),
)
diff --git a/docs/datasette-serve-help.txt b/docs/datasette-serve-help.txt
index 243637cb..f0dab3ea 100644
--- a/docs/datasette-serve-help.txt
+++ b/docs/datasette-serve-help.txt
@@ -41,6 +41,7 @@ Options:
--pdb Launch debugger on any errors
-o, --open Open Datasette in your web browser
--create Create database files if they do not exist
+ --crossdb Enable cross-database joins using the /_memory database
--ssl-keyfile TEXT SSL key file
--ssl-certfile TEXT SSL certificate file
--help Show this message and exit.
diff --git a/docs/internals.rst b/docs/internals.rst
index 4a2c0a8e..a46fe0f5 100644
--- a/docs/internals.rst
+++ b/docs/internals.rst
@@ -677,6 +677,9 @@ The ``Database`` class also provides properties and methods for introspecting th
``db.is_memory`` - boolean
Is this database an in-memory database?
+``await db.attached_databases()`` - list of named tuples
+ Returns a list of additional databases that have been connected to this database using the SQLite ATTACH command. Each named tuple has fields ``seq``, ``name`` and ``file``.
+
``await db.table_exists(table)`` - boolean
Check if a table called ``table`` exists.
diff --git a/docs/sql_queries.rst b/docs/sql_queries.rst
index 93f17eaf..bb263b18 100644
--- a/docs/sql_queries.rst
+++ b/docs/sql_queries.rst
@@ -389,3 +389,34 @@ detect if there should be another page.
Since the where clause acts against the index on the primary key, the query is
extremely fast even for records that are a long way into the overall pagination
set.
+
+.. _cross_database_quereies:
+
+Cross-database queries
+----------------------
+
+SQLite has the ability to run queries that join across multiple databases. Up to ten databases can be attached to a single SQLite connection and queried together.
+
+Datasette can execute joins across multiple databases if it is started with the ``--crossdb`` option::
+
+ datasette fixtures.db extra_database.db --crossdb
+
+If it is started in this way, the ``/_memory`` page can be used to execute queries that join across multiple databases.
+
+References to tables in attached databases should be preceeded by the database name and a period.
+
+For example, this query will show a list of tables across both of the above databases:
+
+.. code-block:: sql
+
+ select
+ 'fixtures' as database, *
+ from
+ [fixtures].sqlite_master
+ union
+ select
+ 'extra_database' as database, *
+ from
+ [extra_database].sqlite_master
+
+`Try that out here `__.
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 1ec6a2ba..30113ff2 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -105,6 +105,7 @@ def make_app_client(
static_mounts=None,
template_dir=None,
metadata=None,
+ crossdb=False,
):
with tempfile.TemporaryDirectory() as tmpdir:
filepath = os.path.join(tmpdir, filename)
@@ -149,6 +150,7 @@ def make_app_client(
inspect_data=inspect_data,
static_mounts=static_mounts,
template_dir=template_dir,
+ crossdb=crossdb,
)
ds.sqlite_functions.append(("sleep", 1, lambda n: time.sleep(float(n))))
yield TestClient(ds)
@@ -180,6 +182,15 @@ def app_client_two_attached_databases():
yield client
+@pytest.fixture(scope="session")
+def app_client_two_attached_databases_crossdb_enabled():
+ with make_app_client(
+ extra_databases={"extra database.db": EXTRA_DATABASE_SQL},
+ crossdb=True,
+ ) as client:
+ yield client
+
+
@pytest.fixture(scope="session")
def app_client_conflicting_database_names():
with make_app_client(
@@ -750,7 +761,12 @@ def assert_permissions_checked(datasette, actions):
default=False,
help="Delete and recreate database if it exists",
)
-def cli(db_filename, metadata, plugins_path, recreate):
+@click.option(
+ "--extra-db-filename",
+ type=click.Path(file_okay=True, dir_okay=False),
+ help="Write out second test DB to this file",
+)
+def cli(db_filename, metadata, plugins_path, recreate, extra_db_filename):
"""Write out the fixtures database used by Datasette's test suite"""
if metadata and not metadata.endswith(".json"):
raise click.ClickException("Metadata should end with .json")
@@ -784,6 +800,17 @@ def cli(db_filename, metadata, plugins_path, recreate):
newpath = path / filepath.name
newpath.write_text(filepath.open().read())
print(f" Wrote plugin: {newpath}")
+ if extra_db_filename:
+ if pathlib.Path(extra_db_filename).exists():
+ if not recreate:
+ raise click.ClickException(
+ f"{extra_db_filename} already exists, use --recreate to reset it"
+ )
+ else:
+ pathlib.Path(extra_db_filename).unlink()
+ conn = sqlite3.connect(extra_db_filename)
+ conn.executescript(EXTRA_DATABASE_SQL)
+ print(f"Test tables written to {extra_db_filename}")
if __name__ == "__main__":
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 4bcf615b..8ddd32f6 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -147,6 +147,7 @@ def test_metadata_yaml():
get=None,
help_config=False,
pdb=False,
+ crossdb=False,
open_browser=False,
create=False,
ssl_keyfile=None,
diff --git a/tests/test_crossdb.py b/tests/test_crossdb.py
new file mode 100644
index 00000000..01c51130
--- /dev/null
+++ b/tests/test_crossdb.py
@@ -0,0 +1,75 @@
+from datasette.cli import cli
+from click.testing import CliRunner
+import urllib
+import sqlite3
+from .fixtures import app_client_two_attached_databases_crossdb_enabled
+
+
+def test_crossdb_join(app_client_two_attached_databases_crossdb_enabled):
+ app_client = app_client_two_attached_databases_crossdb_enabled
+ sql = """
+ select
+ 'extra database' as db,
+ pk,
+ text1,
+ text2
+ from
+ [extra database].searchable
+ union all
+ select
+ 'fixtures' as db,
+ pk,
+ text1,
+ text2
+ from
+ fixtures.searchable
+ """
+ response = app_client.get(
+ "/_memory.json?" + urllib.parse.urlencode({"sql": sql, "_shape": "array"})
+ )
+ assert response.status == 200
+ assert response.json == [
+ {"db": "extra database", "pk": 1, "text1": "barry cat", "text2": "terry dog"},
+ {"db": "extra database", "pk": 2, "text1": "terry dog", "text2": "sara weasel"},
+ {"db": "fixtures", "pk": 1, "text1": "barry cat", "text2": "terry dog"},
+ {"db": "fixtures", "pk": 2, "text1": "terry dog", "text2": "sara weasel"},
+ ]
+
+
+def test_crossdb_warning_if_too_many_databases(tmp_path_factory):
+ db_dir = tmp_path_factory.mktemp("dbs")
+ dbs = []
+ for i in range(11):
+ path = str(db_dir / "db_{}.db".format(i))
+ conn = sqlite3.connect(path)
+ conn.execute("vacuum")
+ dbs.append(path)
+ runner = CliRunner(mix_stderr=False)
+ result = runner.invoke(
+ cli,
+ [
+ "serve",
+ "--crossdb",
+ "--get",
+ "/",
+ ]
+ + dbs,
+ catch_exceptions=False,
+ )
+ assert (
+ "Warning: --crossdb only works with the first 10 attached databases"
+ in result.stderr
+ )
+
+
+def test_crossdb_attached_database_list_display(
+ app_client_two_attached_databases_crossdb_enabled,
+):
+ app_client = app_client_two_attached_databases_crossdb_enabled
+ response = app_client.get("/_memory")
+ for fragment in (
+ "databases are attached to this connection",
+ "fixtures - ",
+ "extra database - ",
+ ):
+ assert fragment in response.text
diff --git a/tests/test_internals_database.py b/tests/test_internals_database.py
index 7eff9f7e..086f1a48 100644
--- a/tests/test_internals_database.py
+++ b/tests/test_internals_database.py
@@ -4,7 +4,7 @@ Tests for the datasette.database.Database class
from datasette.database import Database, Results, MultipleValues
from datasette.utils.sqlite import sqlite3, supports_generated_columns
from datasette.utils import Column
-from .fixtures import app_client
+from .fixtures import app_client, app_client_two_attached_databases_crossdb_enabled
import pytest
import time
import uuid
@@ -466,6 +466,15 @@ def test_is_mutable(app_client):
assert Database(app_client.ds, is_memory=True, is_mutable=False).is_mutable is False
+@pytest.mark.asyncio
+async def test_attached_databases(app_client_two_attached_databases_crossdb_enabled):
+ database = app_client_two_attached_databases_crossdb_enabled.ds.get_database(
+ "_memory"
+ )
+ attached = await database.attached_databases()
+ assert {a.name for a in attached} == {"extra database", "fixtures"}
+
+
@pytest.mark.asyncio
async def test_database_memory_name(app_client):
ds = app_client.ds