From 25014ca25eb70b4c1217558ebd14db2845973bfb Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 27 Apr 2020 09:30:24 -0700 Subject: [PATCH] Configuration directory mode, closes #731 --- datasette/app.py | 34 ++++++++-- datasette/cli.py | 13 +++- docs/conf.py | 3 + docs/config.rst | 93 +++++++++++++++++++++------- docs/custom_templates.rst | 2 + docs/plugins.rst | 12 ++-- tests/test_config_dir.py | 126 ++++++++++++++++++++++++++++++++++++++ tests/test_docs.py | 2 +- 8 files changed, 251 insertions(+), 34 deletions(-) create mode 100644 tests/test_config_dir.py diff --git a/datasette/app.py b/datasette/app.py index 221e862c..3c7644a6 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -164,16 +164,32 @@ class Datasette: memory=False, config=None, version_note=None, + config_dir=None, ): - immutables = immutables or [] - self.files = tuple(files) + tuple(immutables) - self.immutables = set(immutables) + assert config_dir is None or isinstance( + config_dir, Path + ), "config_dir= should be a pathlib.Path" + self.files = tuple(files) + tuple(immutables or []) + if config_dir: + self.files += tuple([str(p) for p in config_dir.glob("*.db")]) + if ( + config_dir + and (config_dir / "inspect-data.json").exists() + and not inspect_data + ): + inspect_data = json.load((config_dir / "inspect-data.json").open()) + if immutables is None: + immutable_filenames = [i["file"] for i in inspect_data.values()] + immutables = [ + f for f in self.files if Path(f).name in immutable_filenames + ] + self.inspect_data = inspect_data + self.immutables = set(immutables or []) if not self.files: self.files = [MEMORY] elif memory: self.files = (MEMORY,) + self.files self.databases = collections.OrderedDict() - self.inspect_data = inspect_data for file in self.files: path = file is_memory = False @@ -187,12 +203,22 @@ class Datasette: self.add_database(db.name, db) self.cache_headers = cache_headers self.cors = cors + if config_dir and (config_dir / "metadata.json").exists() and not metadata: + metadata = json.load((config_dir / "metadata.json").open()) self._metadata = metadata or {} self.sqlite_functions = [] self.sqlite_extensions = sqlite_extensions or [] + if config_dir and (config_dir / "templates").is_dir() and not template_dir: + template_dir = str((config_dir / "templates").resolve()) self.template_dir = template_dir + if config_dir and (config_dir / "plugins").is_dir() and not plugins_dir: + plugins_dir = str((config_dir / "plugins").resolve()) self.plugins_dir = plugins_dir + if config_dir and (config_dir / "static").is_dir() and not static_mounts: + static_mounts = [("static", str((config_dir / "static").resolve()))] self.static_mounts = static_mounts or [] + if config_dir and (config_dir / "config.json").exists() and not config: + config = json.load((config_dir / "config.json").open()) self._config = dict(DEFAULT_CONFIG, **(config or {})) self.renderers = {} # File extension -> renderer function self.version_note = version_note diff --git a/datasette/cli.py b/datasette/cli.py index 6e8f0d9b..a93eaf79 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -5,6 +5,7 @@ from click import formatting from click_default_group import DefaultGroup import json import os +import pathlib import shutil from subprocess import call import sys @@ -352,8 +353,8 @@ def serve( click.echo( "Serve! files={} (immutables={}) on port {}".format(files, immutable, port) ) - ds = Datasette( - files, + + kwargs = dict( immutables=immutable, cache_headers=not debug and not reload, cors=cors, @@ -367,6 +368,14 @@ def serve( memory=memory, version_note=version_note, ) + + # if files is a single directory, use that as config_dir= + if 1 == len(files) and os.path.isdir(files[0]): + kwargs["config_dir"] = pathlib.Path(files[0]) + files = [] + + ds = Datasette(files, **kwargs) + if return_instance: # Private utility mechanism for writing unit tests return ds diff --git a/docs/conf.py b/docs/conf.py index 38fce912..5e0bb328 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,6 +50,9 @@ project = "Datasette" copyright = "2017-2020, Simon Willison" author = "Simon Willison" +# Disable -- turning into – +smartquotes = False + # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. diff --git a/docs/config.rst b/docs/config.rst index 4b12e74b..4859788c 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -3,25 +3,72 @@ Configuration ============= +Using \-\-config +---------------- + Datasette provides a number of configuration options. These can be set using the ``--config name:value`` option to ``datasette serve``. You can set multiple configuration options at once like this:: - datasette mydatabase.db --config default_page_size:50 \ + datasette mydatabase.db \ + --config default_page_size:50 \ --config sql_time_limit_ms:3500 \ --config max_returned_rows:2000 -To prevent rogue, long-running queries from making a Datasette instance inaccessible to other users, Datasette imposes some limits on the SQL that you can execute. These are exposed as config options which you can over-ride. + +.. _config_dir: + +Configuration directory mode +---------------------------- + +Normally you configure Datasette using command-line options. For a Datasette instance with custom templates, custom plugins, a static directory and several databases this can get quite verbose:: + + $ datasette one.db two.db \ + --metadata.json \ + --template-dir=templates/ \ + --plugins-dir=plugins \ + --static css:css + +As an alternative to this, you can run Datasette in *configuration directory* mode. Create a directory with the following structure:: + + # In a directory called my-app: + my-app/one.db + my-app/two.db + my-app/metadata.json + my-app/templates/index.html + my-app/plugins/my_plugin.py + my-app/static/my.css + +Now start Datasette by providing the path to that directory:: + + $ datasette my-app/ + +Datasette will detect the files in that directory and automatically configure itself using them. It will serve all ``*.db`` files that it finds, will load ``metadata.json`` if it exists, and will load the ``templates``, ``plugins`` and ``static`` folders if they are present. + +The files that can be included in this directory are as follows. All are optional. + +* ``*.db`` - SQLite database files that will be served by Datasette +* ``metadata.json`` - :ref:`metadata` for those databases +* ``inspect-data.json`` - the result of running ``datasette inspect`` - any database files listed here will be treated as immutable, so they should not be changed while Datasette is running +* ``config.json`` - settings that would normally be passed using ``--config`` - here they should be stored as a JSON object of key/value pairs +* ``templates/`` - a directory containing :ref:`customization_custom_templates` +* ``plugins/`` - a directory containing plugins, see :ref:`plugins_writing_one_off` +* ``static/`` - a directory containing static files - these will be served from ``/static/filename.txt``, see :ref:`customization_static_files` + +Configuration options +--------------------- + +The followig options can be set using ``--config name:value``, or by storing them in the ``config.json`` file for use with :ref:`config_dir`. default_page_size ------------------ +~~~~~~~~~~~~~~~~~ The default number of rows returned by the table page. You can over-ride this on a per-page basis using the ``?_size=80`` querystring parameter, provided you do not specify a value higher than the ``max_returned_rows`` setting. You can set this default using ``--config`` like so:: datasette mydatabase.db --config default_page_size:50 sql_time_limit_ms ------------------ +~~~~~~~~~~~~~~~~~ By default, queries have a time limit of one second. If a query takes longer than this to run Datasette will terminate the query and return an error. @@ -38,7 +85,7 @@ This would set the time limit to 100ms for that specific query. This feature is .. _config_max_returned_rows: max_returned_rows ------------------ +~~~~~~~~~~~~~~~~~ Datasette returns a maximum of 1,000 rows of data at a time. If you execute a query that returns more than 1,000 rows, Datasette will return the first 1,000 and include a warning that the result set has been truncated. You can use OFFSET/LIMIT or other methods in your SQL to implement pagination if you need to return more than 1,000 rows. @@ -47,7 +94,7 @@ You can increase or decrease this limit like so:: datasette mydatabase.db --config max_returned_rows:2000 num_sql_threads ---------------- +~~~~~~~~~~~~~~~ Maximum number of threads in the thread pool Datasette uses to execute SQLite queries. Defaults to 3. @@ -56,7 +103,7 @@ Maximum number of threads in the thread pool Datasette uses to execute SQLite qu datasette mydatabase.db --config num_sql_threads:10 allow_facet ------------ +~~~~~~~~~~~ Allow users to specify columns they would like to facet on using the ``?_facet=COLNAME`` URL parameter to the table view. @@ -67,21 +114,21 @@ Here's how to disable this feature:: datasette mydatabase.db --config allow_facet:off default_facet_size ------------------- +~~~~~~~~~~~~~~~~~~ The default number of unique rows returned by :ref:`facets` is 30. You can customize it like this:: datasette mydatabase.db --config default_facet_size:50 facet_time_limit_ms -------------------- +~~~~~~~~~~~~~~~~~~~ This is the time limit Datasette allows for calculating a facet, which defaults to 200ms:: datasette mydatabase.db --config facet_time_limit_ms:1000 facet_suggest_time_limit_ms ---------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~ When Datasette calculates suggested facets it needs to run a SQL query for every column in your table. The default for this time limit is 50ms to account for the fact that it needs to run once for every column. If the time limit is exceeded the column will not be suggested as a facet. @@ -90,14 +137,14 @@ You can increase this time limit like so:: datasette mydatabase.db --config facet_suggest_time_limit_ms:500 suggest_facets --------------- +~~~~~~~~~~~~~~ Should Datasette calculate suggested facets? On by default, turn this off like so:: datasette mydatabase.db --config suggest_facets:off allow_download --------------- +~~~~~~~~~~~~~~ Should users be able to download the original SQLite database using a link on the database index page? This is turned on by default - to disable database downloads, use the following:: @@ -106,7 +153,7 @@ Should users be able to download the original SQLite database using a link on th .. _config_allow_sql: allow_sql ---------- +~~~~~~~~~ Enable/disable the ability for users to run custom SQL directly against a database. To disable this feature, run:: @@ -115,7 +162,7 @@ Enable/disable the ability for users to run custom SQL directly against a databa .. _config_default_cache_ttl: default_cache_ttl ------------------ +~~~~~~~~~~~~~~~~~ Default HTTP caching max-age header in seconds, used for ``Cache-Control: max-age=X``. Can be over-ridden on a per-request basis using the ``?_ttl=`` querystring parameter. Set this to ``0`` to disable HTTP caching entirely. Defaults to 5 seconds. @@ -126,7 +173,7 @@ Default HTTP caching max-age header in seconds, used for ``Cache-Control: max-ag .. _config_default_cache_ttl_hashed: default_cache_ttl_hashed ------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~ Default HTTP caching max-age for responses served using using the :ref:`hashed-urls mechanism `. Defaults to 365 days (31536000 seconds). @@ -136,7 +183,7 @@ Default HTTP caching max-age for responses served using using the :ref:`hashed-u cache_size_kb -------------- +~~~~~~~~~~~~~ Sets the amount of memory SQLite uses for its `per-connection cache `_, in KB. @@ -147,7 +194,7 @@ Sets the amount of memory SQLite uses for its `per-connection cache ` where an entire table (potentially hundreds of thousands of rows) can be exported as a single CSV @@ -160,7 +207,7 @@ file. This is turned on by default - you can turn it off like this: .. _config_max_csv_mb: max_csv_mb ----------- +~~~~~~~~~~ The maximum size of CSV that can be exported, in megabytes. Defaults to 100MB. You can disable the limit entirely by settings this to 0: @@ -172,7 +219,7 @@ You can disable the limit entirely by settings this to 0: .. _config_truncate_cells_html: truncate_cells_html -------------------- +~~~~~~~~~~~~~~~~~~~ In the HTML table view, truncate any strings that are longer than this value. The full value will still be available in CSV, JSON and on the individual row @@ -184,7 +231,7 @@ HTML page. Set this to 0 to disable truncation. force_https_urls ----------------- +~~~~~~~~~~~~~~~~ Forces self-referential URLs in the JSON output to always use the ``https://`` protocol. This is useful for cases where the application itself is hosted using @@ -197,7 +244,7 @@ HTTP but is served to the outside world via a proxy that enables HTTPS. .. _config_hash_urls: hash_urls ---------- +~~~~~~~~~ When enabled, this setting causes Datasette to append a content hash of the database file to the URL path for every table and query within that database. @@ -213,7 +260,7 @@ itself will result in new, uncachcacheed URL paths. .. _config_template_debug: template_debug --------------- +~~~~~~~~~~~~~~ This setting enables template context debug mode, which is useful to help understand what variables are available to custom templates when you are writing them. @@ -232,7 +279,7 @@ Some examples: .. _config_base_url: base_url --------- +~~~~~~~~ If you are running Datasette behind a proxy, it may be useful to change the root URL used for the Datasette instance. diff --git a/docs/custom_templates.rst b/docs/custom_templates.rst index 4a300e82..142ecc97 100644 --- a/docs/custom_templates.rst +++ b/docs/custom_templates.rst @@ -109,6 +109,8 @@ database column they are representing, for example:: +.. _customization_static_files: + Serving static files ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/plugins.rst b/docs/plugins.rst index 5f3eed0d..59d39a62 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -27,8 +27,10 @@ Things you can do with plugins include: or implement authentication, for example `datasette-auth-github `__. -Using plugins -------------- +.. _plugins_installing: + +Installing plugins +------------------ If a plugin has been packaged for distribution using setuptools you can use the plugin by installing it alongside Datasette in the same virtual @@ -48,8 +50,10 @@ install`` such as a URL to a ``.zip`` file:: --install=datasette-plugin-demos \ --install=https://url-to-my-package.zip -Writing plugins ---------------- +.. _plugins_writing_one_off: + +Writing one-off plugins +----------------------- The easiest way to write a plugin is to create a ``my_plugin.py`` file and drop it into your ``plugins/`` directory. Here is an example plugin, which diff --git a/tests/test_config_dir.py b/tests/test_config_dir.py new file mode 100644 index 00000000..24c96a2b --- /dev/null +++ b/tests/test_config_dir.py @@ -0,0 +1,126 @@ +import json +import pytest +import sqlite3 + +from datasette.app import Datasette +from .fixtures import TestClient + +PLUGIN = """ +from datasette import hookimpl + +@hookimpl +def extra_template_vars(): + print("this is template vars") + return { + "from_plugin": "hooray" + } +""" +METADATA = {"title": "This is from metadata"} +CONFIG = { + "default_cache_ttl": 60, + "allow_sql": False, +} +CSS = """ +body { margin-top: 3em} +""" + + +@pytest.fixture(scope="session") +def config_dir_client(tmp_path_factory): + config_dir = tmp_path_factory.mktemp("config-dir") + + plugins_dir = config_dir / "plugins" + plugins_dir.mkdir() + (plugins_dir / "hooray.py").write_text(PLUGIN, "utf-8") + + templates_dir = config_dir / "templates" + templates_dir.mkdir() + (templates_dir / "row.html").write_text( + "Show row here. Plugin says {{ from_plugin }}", "utf-8" + ) + + static_dir = config_dir / "static" + static_dir.mkdir() + (static_dir / "hello.css").write_text(CSS, "utf-8") + + (config_dir / "metadata.json").write_text(json.dumps(METADATA), "utf-8") + (config_dir / "config.json").write_text(json.dumps(CONFIG), "utf-8") + + for dbname in ("demo.db", "immutable.db"): + db = sqlite3.connect(str(config_dir / dbname)) + db.executescript( + """ + CREATE TABLE cities ( + id integer primary key, + name text + ); + INSERT INTO cities (id, name) VALUES + (1, 'San Francisco') + ; + """ + ) + + # Mark "immutable.db" as immutable + (config_dir / "inspect-data.json").write_text( + json.dumps( + { + "immutable": { + "hash": "hash", + "size": 8192, + "file": "immutable.db", + "tables": {"cities": {"count": 1}}, + } + } + ), + "utf-8", + ) + + ds = Datasette([], config_dir=config_dir) + client = TestClient(ds.app()) + client.ds = ds + yield client + + +def test_metadata(config_dir_client): + response = config_dir_client.get("/-/metadata.json") + assert 200 == response.status + assert METADATA == json.loads(response.text) + + +def test_config(config_dir_client): + response = config_dir_client.get("/-/config.json") + assert 200 == response.status + config = json.loads(response.text) + assert 60 == config["default_cache_ttl"] + assert not config["allow_sql"] + + +def test_plugins(config_dir_client): + response = config_dir_client.get("/-/plugins.json") + assert 200 == response.status + assert "hooray.py" in {p["name"] for p in json.loads(response.text)} + + +def test_templates_and_plugin(config_dir_client): + response = config_dir_client.get("/demo/cities/1") + assert 200 == response.status + assert "Show row here. Plugin says hooray" == response.text + + +def test_static(config_dir_client): + response = config_dir_client.get("/static/hello.css") + assert 200 == response.status + assert CSS == response.text + assert "text/css" == response.headers["content-type"] + + +def test_databases(config_dir_client): + response = config_dir_client.get("/-/databases.json") + assert 200 == response.status + databases = json.loads(response.text) + assert 2 == len(databases) + databases.sort(key=lambda d: d["name"]) + assert "demo" == databases[0]["name"] + assert databases[0]["is_mutable"] + assert "immutable" == databases[1]["name"] + assert not databases[1]["is_mutable"] diff --git a/tests/test_docs.py b/tests/test_docs.py index 57fa5a68..d7c5a534 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -26,7 +26,7 @@ def get_labels(filename): @pytest.mark.parametrize("config", app.CONFIG_OPTIONS) def test_config_options_are_documented(config): - assert config.name in get_headings("config.rst") + assert config.name in get_headings("config.rst", "~") @pytest.mark.parametrize(