kopia lustrzana https://github.com/simonw/datasette
--crossdb option for joining across databases (#1232)
* Test for cross-database join, refs #283 * Warn if --crossdb used with more than 10 DBs, refs #283 * latest.datasette.io demo of --crossdb joins, refs #283 * Show attached databases on /_memory page, refs #283 * Documentation for cross-database queries, refs #283pull/1243/head
rodzic
4df548e766
commit
6f41c8a2be
|
@ -31,7 +31,7 @@ jobs:
|
|||
- name: Run tests
|
||||
run: pytest
|
||||
- name: Build fixtures.db
|
||||
run: python tests/fixtures.py fixtures.db fixtures.json plugins
|
||||
run: python tests/fixtures.py fixtures.db fixtures.json plugins --extra-db-filename extra_database.db
|
||||
- name: Build docs.db
|
||||
run: |-
|
||||
cd docs
|
||||
|
@ -48,12 +48,12 @@ jobs:
|
|||
run: |-
|
||||
gcloud config set run/region us-central1
|
||||
gcloud config set project datasette-222320
|
||||
datasette publish cloudrun fixtures.db \
|
||||
datasette publish cloudrun fixtures.db extra_database.db \
|
||||
-m fixtures.json \
|
||||
--plugins-dir=plugins \
|
||||
--branch=$GITHUB_SHA \
|
||||
--version-note=$GITHUB_SHA \
|
||||
--extra-options="--setting template_debug 1" \
|
||||
--extra-options="--setting template_debug 1 --crossdb" \
|
||||
--install=pysqlite3-binary \
|
||||
--service=datasette-latest
|
||||
# Deploy docs.db to a different service
|
||||
|
|
|
@ -85,6 +85,9 @@ from .version import __version__
|
|||
|
||||
app_root = Path(__file__).parent.parent
|
||||
|
||||
# https://github.com/simonw/datasette/issues/283#issuecomment-781591015
|
||||
SQLITE_LIMIT_ATTACHED = 10
|
||||
|
||||
Setting = collections.namedtuple("Setting", ("name", "default", "help"))
|
||||
SETTINGS = (
|
||||
Setting("default_page_size", 100, "Default page size for the table view"),
|
||||
|
@ -194,6 +197,7 @@ class Datasette:
|
|||
version_note=None,
|
||||
config_dir=None,
|
||||
pdb=False,
|
||||
crossdb=False,
|
||||
):
|
||||
assert config_dir is None or isinstance(
|
||||
config_dir, Path
|
||||
|
@ -217,7 +221,8 @@ class Datasette:
|
|||
self.inspect_data = inspect_data
|
||||
self.immutables = set(immutables or [])
|
||||
self.databases = collections.OrderedDict()
|
||||
if memory or not self.files:
|
||||
self.crossdb = crossdb
|
||||
if memory or crossdb or not self.files:
|
||||
self.add_database(Database(self, is_memory=True), name="_memory")
|
||||
# memory_name is a random string so that each Datasette instance gets its own
|
||||
# unique in-memory named database - otherwise unit tests can fail with weird
|
||||
|
@ -499,6 +504,19 @@ class Datasette:
|
|||
conn.execute(f"PRAGMA cache_size=-{self.setting('cache_size_kb')}")
|
||||
# pylint: disable=no-member
|
||||
pm.hook.prepare_connection(conn=conn, database=database, datasette=self)
|
||||
# If self.crossdb and this is _memory, connect the first SQLITE_LIMIT_ATTACHED databases
|
||||
if self.crossdb and database == "_memory":
|
||||
count = 0
|
||||
for db_name, db in self.databases.items():
|
||||
if count >= SQLITE_LIMIT_ATTACHED or db.is_memory:
|
||||
continue
|
||||
sql = 'ATTACH DATABASE "file:{path}?{qs}" AS [{name}];'.format(
|
||||
path=db.path,
|
||||
qs="mode=ro" if db.is_mutable else "immutable=1",
|
||||
name=db_name,
|
||||
)
|
||||
conn.execute(sql)
|
||||
count += 1
|
||||
|
||||
def add_message(self, request, message, type=INFO):
|
||||
if not hasattr(request, "_messages"):
|
||||
|
|
|
@ -12,7 +12,7 @@ from subprocess import call
|
|||
import sys
|
||||
from runpy import run_module
|
||||
import webbrowser
|
||||
from .app import Datasette, DEFAULT_SETTINGS, SETTINGS, pm
|
||||
from .app import Datasette, DEFAULT_SETTINGS, SETTINGS, SQLITE_LIMIT_ATTACHED, pm
|
||||
from .utils import (
|
||||
StartupError,
|
||||
check_connection,
|
||||
|
@ -410,6 +410,11 @@ def uninstall(packages, yes):
|
|||
is_flag=True,
|
||||
help="Create database files if they do not exist",
|
||||
)
|
||||
@click.option(
|
||||
"--crossdb",
|
||||
is_flag=True,
|
||||
help="Enable cross-database joins using the /_memory database",
|
||||
)
|
||||
@click.option(
|
||||
"--ssl-keyfile",
|
||||
help="SSL key file",
|
||||
|
@ -442,6 +447,7 @@ def serve(
|
|||
pdb,
|
||||
open_browser,
|
||||
create,
|
||||
crossdb,
|
||||
ssl_keyfile,
|
||||
ssl_certfile,
|
||||
return_instance=False,
|
||||
|
@ -499,6 +505,7 @@ def serve(
|
|||
secret=secret,
|
||||
version_note=version_note,
|
||||
pdb=pdb,
|
||||
crossdb=crossdb,
|
||||
)
|
||||
|
||||
# if files is a single directory, use that as config_dir=
|
||||
|
@ -591,3 +598,15 @@ async def check_databases(ds):
|
|||
raise click.UsageError(
|
||||
f"Connection to {database.path} failed check: {str(e.args[0])}"
|
||||
)
|
||||
# If --crossdb and more than SQLITE_LIMIT_ATTACHED show warning
|
||||
if (
|
||||
ds.crossdb
|
||||
and len([db for db in ds.databases.values() if not db.is_memory])
|
||||
> SQLITE_LIMIT_ATTACHED
|
||||
):
|
||||
msg = (
|
||||
"Warning: --crossdb only works with the first {} attached databases".format(
|
||||
SQLITE_LIMIT_ATTACHED
|
||||
)
|
||||
)
|
||||
click.echo(click.style(msg, bold=True, fg="yellow"), err=True)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import asyncio
|
||||
from collections import namedtuple
|
||||
from pathlib import Path
|
||||
import janus
|
||||
import queue
|
||||
|
@ -22,6 +23,8 @@ from .inspect import inspect_hash
|
|||
|
||||
connections = threading.local()
|
||||
|
||||
AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file"))
|
||||
|
||||
|
||||
class Database:
|
||||
def __init__(
|
||||
|
@ -78,7 +81,7 @@ class Database:
|
|||
conn.execute("PRAGMA query_only=1")
|
||||
return conn
|
||||
if self.is_memory:
|
||||
return sqlite3.connect(":memory:")
|
||||
return sqlite3.connect(":memory:", uri=True)
|
||||
# mode=ro or immutable=1?
|
||||
if self.is_mutable:
|
||||
qs = "?mode=ro"
|
||||
|
@ -243,6 +246,12 @@ class Database:
|
|||
return None
|
||||
return Path(self.path).stat().st_mtime_ns
|
||||
|
||||
async def attached_databases(self):
|
||||
results = await self.execute(
|
||||
"select seq, name, file from pragma_database_list() where seq > 0"
|
||||
)
|
||||
return [AttachedDatabase(*row) for row in results.rows]
|
||||
|
||||
async def table_exists(self, table):
|
||||
results = await self.execute(
|
||||
"select 1 from sqlite_master where type='table' and name=?", params=(table,)
|
||||
|
|
|
@ -56,6 +56,17 @@
|
|||
</form>
|
||||
{% endif %}
|
||||
|
||||
{% if attached_databases %}
|
||||
<div class="message-info">
|
||||
<p>The following databases are attached to this connection, and can be used for cross-database joins:</p>
|
||||
<ul class="bullets">
|
||||
{% for db_name in attached_databases %}
|
||||
<li><strong>{{ db_name }}</strong> - <a href="?sql=select+*+from+[{{ db_name }}].sqlite_master+where+type='table'">tables</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% for table in tables %}
|
||||
{% if show_hidden or not table.hidden %}
|
||||
<div class="db-table">
|
||||
|
|
|
@ -115,6 +115,8 @@ class DatabaseView(DataView):
|
|||
links.extend(extra_links)
|
||||
return links
|
||||
|
||||
attached_databases = [d.name for d in await db.attached_databases()]
|
||||
|
||||
return (
|
||||
{
|
||||
"database": database,
|
||||
|
@ -139,6 +141,7 @@ class DatabaseView(DataView):
|
|||
"allow_download": self.ds.setting("allow_download")
|
||||
and not db.is_mutable
|
||||
and not db.is_memory,
|
||||
"attached_databases": attached_databases,
|
||||
},
|
||||
(f"database-{to_css_class(database)}.html", "database.html"),
|
||||
)
|
||||
|
|
|
@ -41,6 +41,7 @@ Options:
|
|||
--pdb Launch debugger on any errors
|
||||
-o, --open Open Datasette in your web browser
|
||||
--create Create database files if they do not exist
|
||||
--crossdb Enable cross-database joins using the /_memory database
|
||||
--ssl-keyfile TEXT SSL key file
|
||||
--ssl-certfile TEXT SSL certificate file
|
||||
--help Show this message and exit.
|
||||
|
|
|
@ -677,6 +677,9 @@ The ``Database`` class also provides properties and methods for introspecting th
|
|||
``db.is_memory`` - boolean
|
||||
Is this database an in-memory database?
|
||||
|
||||
``await db.attached_databases()`` - list of named tuples
|
||||
Returns a list of additional databases that have been connected to this database using the SQLite ATTACH command. Each named tuple has fields ``seq``, ``name`` and ``file``.
|
||||
|
||||
``await db.table_exists(table)`` - boolean
|
||||
Check if a table called ``table`` exists.
|
||||
|
||||
|
|
|
@ -389,3 +389,34 @@ detect if there should be another page.
|
|||
Since the where clause acts against the index on the primary key, the query is
|
||||
extremely fast even for records that are a long way into the overall pagination
|
||||
set.
|
||||
|
||||
.. _cross_database_quereies:
|
||||
|
||||
Cross-database queries
|
||||
----------------------
|
||||
|
||||
SQLite has the ability to run queries that join across multiple databases. Up to ten databases can be attached to a single SQLite connection and queried together.
|
||||
|
||||
Datasette can execute joins across multiple databases if it is started with the ``--crossdb`` option::
|
||||
|
||||
datasette fixtures.db extra_database.db --crossdb
|
||||
|
||||
If it is started in this way, the ``/_memory`` page can be used to execute queries that join across multiple databases.
|
||||
|
||||
References to tables in attached databases should be preceeded by the database name and a period.
|
||||
|
||||
For example, this query will show a list of tables across both of the above databases:
|
||||
|
||||
.. code-block:: sql
|
||||
|
||||
select
|
||||
'fixtures' as database, *
|
||||
from
|
||||
[fixtures].sqlite_master
|
||||
union
|
||||
select
|
||||
'extra_database' as database, *
|
||||
from
|
||||
[extra_database].sqlite_master
|
||||
|
||||
`Try that out here <https://latest.datasette.io/_memory?sql=select%0D%0A++%27fixtures%27+as+database%2C+*%0D%0Afrom%0D%0A++%5Bfixtures%5D.sqlite_master%0D%0Aunion%0D%0Aselect%0D%0A++%27extra_database%27+as+database%2C+*%0D%0Afrom%0D%0A++%5Bextra_database%5D.sqlite_master>`__.
|
||||
|
|
|
@ -105,6 +105,7 @@ def make_app_client(
|
|||
static_mounts=None,
|
||||
template_dir=None,
|
||||
metadata=None,
|
||||
crossdb=False,
|
||||
):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
filepath = os.path.join(tmpdir, filename)
|
||||
|
@ -149,6 +150,7 @@ def make_app_client(
|
|||
inspect_data=inspect_data,
|
||||
static_mounts=static_mounts,
|
||||
template_dir=template_dir,
|
||||
crossdb=crossdb,
|
||||
)
|
||||
ds.sqlite_functions.append(("sleep", 1, lambda n: time.sleep(float(n))))
|
||||
yield TestClient(ds)
|
||||
|
@ -180,6 +182,15 @@ def app_client_two_attached_databases():
|
|||
yield client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def app_client_two_attached_databases_crossdb_enabled():
|
||||
with make_app_client(
|
||||
extra_databases={"extra database.db": EXTRA_DATABASE_SQL},
|
||||
crossdb=True,
|
||||
) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def app_client_conflicting_database_names():
|
||||
with make_app_client(
|
||||
|
@ -750,7 +761,12 @@ def assert_permissions_checked(datasette, actions):
|
|||
default=False,
|
||||
help="Delete and recreate database if it exists",
|
||||
)
|
||||
def cli(db_filename, metadata, plugins_path, recreate):
|
||||
@click.option(
|
||||
"--extra-db-filename",
|
||||
type=click.Path(file_okay=True, dir_okay=False),
|
||||
help="Write out second test DB to this file",
|
||||
)
|
||||
def cli(db_filename, metadata, plugins_path, recreate, extra_db_filename):
|
||||
"""Write out the fixtures database used by Datasette's test suite"""
|
||||
if metadata and not metadata.endswith(".json"):
|
||||
raise click.ClickException("Metadata should end with .json")
|
||||
|
@ -784,6 +800,17 @@ def cli(db_filename, metadata, plugins_path, recreate):
|
|||
newpath = path / filepath.name
|
||||
newpath.write_text(filepath.open().read())
|
||||
print(f" Wrote plugin: {newpath}")
|
||||
if extra_db_filename:
|
||||
if pathlib.Path(extra_db_filename).exists():
|
||||
if not recreate:
|
||||
raise click.ClickException(
|
||||
f"{extra_db_filename} already exists, use --recreate to reset it"
|
||||
)
|
||||
else:
|
||||
pathlib.Path(extra_db_filename).unlink()
|
||||
conn = sqlite3.connect(extra_db_filename)
|
||||
conn.executescript(EXTRA_DATABASE_SQL)
|
||||
print(f"Test tables written to {extra_db_filename}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -147,6 +147,7 @@ def test_metadata_yaml():
|
|||
get=None,
|
||||
help_config=False,
|
||||
pdb=False,
|
||||
crossdb=False,
|
||||
open_browser=False,
|
||||
create=False,
|
||||
ssl_keyfile=None,
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
from datasette.cli import cli
|
||||
from click.testing import CliRunner
|
||||
import urllib
|
||||
import sqlite3
|
||||
from .fixtures import app_client_two_attached_databases_crossdb_enabled
|
||||
|
||||
|
||||
def test_crossdb_join(app_client_two_attached_databases_crossdb_enabled):
|
||||
app_client = app_client_two_attached_databases_crossdb_enabled
|
||||
sql = """
|
||||
select
|
||||
'extra database' as db,
|
||||
pk,
|
||||
text1,
|
||||
text2
|
||||
from
|
||||
[extra database].searchable
|
||||
union all
|
||||
select
|
||||
'fixtures' as db,
|
||||
pk,
|
||||
text1,
|
||||
text2
|
||||
from
|
||||
fixtures.searchable
|
||||
"""
|
||||
response = app_client.get(
|
||||
"/_memory.json?" + urllib.parse.urlencode({"sql": sql, "_shape": "array"})
|
||||
)
|
||||
assert response.status == 200
|
||||
assert response.json == [
|
||||
{"db": "extra database", "pk": 1, "text1": "barry cat", "text2": "terry dog"},
|
||||
{"db": "extra database", "pk": 2, "text1": "terry dog", "text2": "sara weasel"},
|
||||
{"db": "fixtures", "pk": 1, "text1": "barry cat", "text2": "terry dog"},
|
||||
{"db": "fixtures", "pk": 2, "text1": "terry dog", "text2": "sara weasel"},
|
||||
]
|
||||
|
||||
|
||||
def test_crossdb_warning_if_too_many_databases(tmp_path_factory):
|
||||
db_dir = tmp_path_factory.mktemp("dbs")
|
||||
dbs = []
|
||||
for i in range(11):
|
||||
path = str(db_dir / "db_{}.db".format(i))
|
||||
conn = sqlite3.connect(path)
|
||||
conn.execute("vacuum")
|
||||
dbs.append(path)
|
||||
runner = CliRunner(mix_stderr=False)
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
[
|
||||
"serve",
|
||||
"--crossdb",
|
||||
"--get",
|
||||
"/",
|
||||
]
|
||||
+ dbs,
|
||||
catch_exceptions=False,
|
||||
)
|
||||
assert (
|
||||
"Warning: --crossdb only works with the first 10 attached databases"
|
||||
in result.stderr
|
||||
)
|
||||
|
||||
|
||||
def test_crossdb_attached_database_list_display(
|
||||
app_client_two_attached_databases_crossdb_enabled,
|
||||
):
|
||||
app_client = app_client_two_attached_databases_crossdb_enabled
|
||||
response = app_client.get("/_memory")
|
||||
for fragment in (
|
||||
"databases are attached to this connection",
|
||||
"<li><strong>fixtures</strong> - ",
|
||||
"<li><strong>extra database</strong> - ",
|
||||
):
|
||||
assert fragment in response.text
|
|
@ -4,7 +4,7 @@ Tests for the datasette.database.Database class
|
|||
from datasette.database import Database, Results, MultipleValues
|
||||
from datasette.utils.sqlite import sqlite3, supports_generated_columns
|
||||
from datasette.utils import Column
|
||||
from .fixtures import app_client
|
||||
from .fixtures import app_client, app_client_two_attached_databases_crossdb_enabled
|
||||
import pytest
|
||||
import time
|
||||
import uuid
|
||||
|
@ -466,6 +466,15 @@ def test_is_mutable(app_client):
|
|||
assert Database(app_client.ds, is_memory=True, is_mutable=False).is_mutable is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_attached_databases(app_client_two_attached_databases_crossdb_enabled):
|
||||
database = app_client_two_attached_databases_crossdb_enabled.ds.get_database(
|
||||
"_memory"
|
||||
)
|
||||
attached = await database.attached_databases()
|
||||
assert {a.name for a in attached} == {"extra database", "fixtures"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_database_memory_name(app_client):
|
||||
ds = app_client.ds
|
||||
|
|
Ładowanie…
Reference in New Issue