datasette/datasette/database.py

616 wiersze
21 KiB
Python
Czysty Zwykły widok Historia

import asyncio
from collections import namedtuple
from pathlib import Path
import janus
import queue
2020-12-05 05:21:11 +00:00
import sys
import threading
import uuid
from .tracer import trace
from .utils import (
detect_fts,
detect_primary_keys,
detect_spatialite,
get_all_foreign_keys,
get_outbound_foreign_keys,
md5_not_usedforsecurity,
sqlite_timelimit,
sqlite3,
table_columns,
table_column_details,
)
from .inspect import inspect_hash
connections = threading.local()
AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file"))
class Database:
def __init__(
self,
ds,
path=None,
is_mutable=True,
is_memory=False,
memory_name=None,
mode=None,
):
self.name = None
self.route = None
self.ds = ds
self.path = path
self.is_mutable = is_mutable
self.is_memory = is_memory
self.memory_name = memory_name
if memory_name is not None:
self.is_memory = True
self.cached_hash = None
self.cached_size = None
self._cached_table_counts = None
self._write_thread = None
self._write_queue = None
# These are used when in non-threaded mode:
self._read_connection = None
self._write_connection = None
# This is used to track all file connections so they can be closed
self._all_file_connections = []
self.mode = mode
@property
def cached_table_counts(self):
if self._cached_table_counts is not None:
return self._cached_table_counts
# Maybe use self.ds.inspect_data to populate cached_table_counts
if self.ds.inspect_data and self.ds.inspect_data.get(self.name):
self._cached_table_counts = {
key: value["count"]
for key, value in self.ds.inspect_data[self.name]["tables"].items()
}
return self._cached_table_counts
@property
def color(self):
if self.hash:
return self.hash[:6]
return md5_not_usedforsecurity(self.name)[:6]
def suggest_name(self):
if self.path:
return Path(self.path).stem
elif self.memory_name:
return self.memory_name
else:
return "db"
def connect(self, write=False):
if self.memory_name:
uri = "file:{}?mode=memory&cache=shared".format(self.memory_name)
conn = sqlite3.connect(
uri,
uri=True,
check_same_thread=False,
)
if not write:
conn.execute("PRAGMA query_only=1")
return conn
if self.is_memory:
return sqlite3.connect(":memory:", uri=True)
# mode=ro or immutable=1?
if self.is_mutable:
qs = "?mode=ro"
if self.ds.nolock:
qs += "&nolock=1"
else:
qs = "?immutable=1"
assert not (write and not self.is_mutable)
if write:
qs = ""
if self.mode is not None:
qs = f"?mode={self.mode}"
conn = sqlite3.connect(
f"file:{self.path}{qs}", uri=True, check_same_thread=False
)
self._all_file_connections.append(conn)
return conn
def close(self):
# Close all connections - useful to avoid running out of file handles in tests
for connection in self._all_file_connections:
connection.close()
async def execute_write(self, sql, params=None, block=True):
def _inner(conn):
return conn.execute(sql, params or [])
with trace("sql", database=self.name, sql=sql.strip(), params=params):
results = await self.execute_write_fn(_inner, block=block)
return results
async def execute_write_script(self, sql, block=True):
def _inner(conn):
return conn.executescript(sql)
with trace("sql", database=self.name, sql=sql.strip(), executescript=True):
results = await self.execute_write_fn(_inner, block=block)
return results
async def execute_write_many(self, sql, params_seq, block=True):
def _inner(conn):
count = 0
def count_params(params):
nonlocal count
for param in params:
count += 1
yield param
return conn.executemany(sql, count_params(params_seq)), count
with trace(
"sql", database=self.name, sql=sql.strip(), executemany=True
) as kwargs:
results, count = await self.execute_write_fn(_inner, block=block)
kwargs["count"] = count
return results
async def execute_isolated_fn(self, fn):
# Open a new connection just for the duration of this function
# blocking the write queue to avoid any writes occurring during it
if self.ds.executor is None:
# non-threaded mode
isolated_connection = self.connect(write=True)
try:
result = fn(isolated_connection)
finally:
isolated_connection.close()
try:
self._all_file_connections.remove(isolated_connection)
except ValueError:
# Was probably a memory connection
pass
return result
else:
# Threaded mode - send to write thread
return await self._send_to_write_thread(fn, isolated_connection=True)
async def execute_write_fn(self, fn, block=True, transaction=True):
if self.ds.executor is None:
# non-threaded mode
if self._write_connection is None:
self._write_connection = self.connect(write=True)
self.ds._prepare_connection(self._write_connection, self.name)
if transaction:
with self._write_connection:
return fn(self._write_connection)
else:
return fn(self._write_connection)
else:
return await self._send_to_write_thread(
fn, block=block, transaction=transaction
)
async def _send_to_write_thread(
self, fn, block=True, isolated_connection=False, transaction=True
):
if self._write_queue is None:
self._write_queue = queue.Queue()
if self._write_thread is None:
self._write_thread = threading.Thread(
target=self._execute_writes, daemon=True
)
self._write_thread.name = "_execute_writes for database {}".format(
self.name
)
self._write_thread.start()
task_id = uuid.uuid5(uuid.NAMESPACE_DNS, "datasette.io")
reply_queue = janus.Queue()
self._write_queue.put(
WriteTask(fn, task_id, reply_queue, isolated_connection, transaction)
)
if block:
result = await reply_queue.async_q.get()
if isinstance(result, Exception):
raise result
else:
return result
else:
return task_id
def _execute_writes(self):
# Infinite looping thread that protects the single write connection
# to this database
conn_exception = None
conn = None
try:
conn = self.connect(write=True)
self.ds._prepare_connection(conn, self.name)
except Exception as e:
conn_exception = e
while True:
task = self._write_queue.get()
if conn_exception is not None:
result = conn_exception
else:
if task.isolated_connection:
isolated_connection = self.connect(write=True)
try:
result = task.fn(isolated_connection)
except Exception as e:
sys.stderr.write("{}\n".format(e))
sys.stderr.flush()
result = e
finally:
isolated_connection.close()
try:
self._all_file_connections.remove(isolated_connection)
except ValueError:
# Was probably a memory connection
pass
else:
try:
if task.transaction:
with conn:
result = task.fn(conn)
else:
result = task.fn(conn)
except Exception as e:
sys.stderr.write("{}\n".format(e))
sys.stderr.flush()
result = e
task.reply_queue.sync_q.put(result)
async def execute_fn(self, fn):
if self.ds.executor is None:
# non-threaded mode
if self._read_connection is None:
self._read_connection = self.connect()
self.ds._prepare_connection(self._read_connection, self.name)
return fn(self._read_connection)
# threaded mode
def in_thread():
conn = getattr(connections, self.name, None)
if not conn:
conn = self.connect()
self.ds._prepare_connection(conn, self.name)
setattr(connections, self.name, conn)
return fn(conn)
return await asyncio.get_event_loop().run_in_executor(
self.ds.executor, in_thread
)
async def execute(
self,
sql,
params=None,
truncate=False,
custom_time_limit=None,
page_size=None,
log_sql_errors=True,
):
"""Executes sql against db_name in a thread"""
page_size = page_size or self.ds.page_size
def sql_operation_in_thread(conn):
time_limit_ms = self.ds.sql_time_limit_ms
if custom_time_limit and custom_time_limit < time_limit_ms:
time_limit_ms = custom_time_limit
with sqlite_timelimit(conn, time_limit_ms):
try:
cursor = conn.cursor()
cursor.execute(sql, params if params is not None else {})
max_returned_rows = self.ds.max_returned_rows
if max_returned_rows == page_size:
max_returned_rows += 1
if max_returned_rows and truncate:
rows = cursor.fetchmany(max_returned_rows + 1)
truncated = len(rows) > max_returned_rows
rows = rows[:max_returned_rows]
else:
rows = cursor.fetchall()
truncated = False
except (sqlite3.OperationalError, sqlite3.DatabaseError) as e:
if e.args == ("interrupted",):
raise QueryInterrupted(e, sql, params)
if log_sql_errors:
2020-12-05 05:21:11 +00:00
sys.stderr.write(
"ERROR: conn={}, sql = {}, params = {}: {}\n".format(
conn, repr(sql), params, e
)
)
2020-12-05 05:21:11 +00:00
sys.stderr.flush()
raise
if truncate:
return Results(rows, truncated, cursor.description)
else:
return Results(rows, False, cursor.description)
with trace("sql", database=self.name, sql=sql.strip(), params=params):
results = await self.execute_fn(sql_operation_in_thread)
return results
@property
def hash(self):
if self.cached_hash is not None:
return self.cached_hash
elif self.is_mutable or self.is_memory:
return None
elif self.ds.inspect_data and self.ds.inspect_data.get(self.name):
self.cached_hash = self.ds.inspect_data[self.name]["hash"]
return self.cached_hash
else:
p = Path(self.path)
self.cached_hash = inspect_hash(p)
return self.cached_hash
@property
def size(self):
if self.cached_size is not None:
return self.cached_size
elif self.is_memory:
return 0
elif self.is_mutable:
return Path(self.path).stat().st_size
elif self.ds.inspect_data and self.ds.inspect_data.get(self.name):
self.cached_size = self.ds.inspect_data[self.name]["size"]
return self.cached_size
else:
self.cached_size = Path(self.path).stat().st_size
return self.cached_size
async def table_counts(self, limit=10):
if not self.is_mutable and self.cached_table_counts is not None:
return self.cached_table_counts
# Try to get counts for each table, $limit timeout for each count
counts = {}
for table in await self.table_names():
try:
table_count = (
await self.execute(
f"select count(*) from [{table}]",
custom_time_limit=limit,
)
).rows[0][0]
counts[table] = table_count
# In some cases I saw "SQL Logic Error" here in addition to
# QueryInterrupted - so we catch that too:
except (QueryInterrupted, sqlite3.OperationalError, sqlite3.DatabaseError):
counts[table] = None
if not self.is_mutable:
self._cached_table_counts = counts
return counts
@property
def mtime_ns(self):
2020-05-30 18:17:20 +00:00
if self.is_memory:
return None
return Path(self.path).stat().st_mtime_ns
async def attached_databases(self):
# This used to be:
# select seq, name, file from pragma_database_list() where seq > 0
# But SQLite prior to 3.16.0 doesn't support pragma functions
results = await self.execute("PRAGMA database_list;")
# {'seq': 0, 'name': 'main', 'file': ''}
return [AttachedDatabase(*row) for row in results.rows if row["seq"] > 0]
async def table_exists(self, table):
results = await self.execute(
"select 1 from sqlite_master where type='table' and name=?", params=(table,)
)
return bool(results.rows)
async def view_exists(self, table):
results = await self.execute(
"select 1 from sqlite_master where type='view' and name=?", params=(table,)
)
return bool(results.rows)
async def table_names(self):
results = await self.execute(
"select name from sqlite_master where type='table'"
)
return [r[0] for r in results.rows]
async def table_columns(self, table):
return await self.execute_fn(lambda conn: table_columns(conn, table))
async def table_column_details(self, table):
return await self.execute_fn(lambda conn: table_column_details(conn, table))
async def primary_keys(self, table):
return await self.execute_fn(lambda conn: detect_primary_keys(conn, table))
async def fts_table(self, table):
return await self.execute_fn(lambda conn: detect_fts(conn, table))
async def label_column_for_table(self, table):
explicit_label_column = (await self.ds.table_config(self.name, table)).get(
"label_column"
)
if explicit_label_column:
return explicit_label_column
column_names = await self.execute_fn(lambda conn: table_columns(conn, table))
# Is there a name or title column?
name_or_title = [c for c in column_names if c.lower() in ("name", "title")]
if name_or_title:
return name_or_title[0]
2020-11-29 20:27:34 +00:00
# If a table has two columns, one of which is ID, then label_column is the other one
if (
column_names
and len(column_names) == 2
and ("id" in column_names or "pk" in column_names)
):
return [c for c in column_names if c not in ("id", "pk")][0]
# Couldn't find a label:
return None
async def foreign_keys_for_table(self, table):
return await self.execute_fn(
lambda conn: get_outbound_foreign_keys(conn, table)
)
async def hidden_table_names(self):
# Mark tables 'hidden' if they relate to FTS virtual tables
hidden_tables = [
r[0]
for r in (
await self.execute(
"""
select name from sqlite_master
where rootpage = 0
and (
sql like '%VIRTUAL TABLE%USING FTS%'
) or name in ('sqlite_stat1', 'sqlite_stat2', 'sqlite_stat3', 'sqlite_stat4')
"""
)
).rows
]
has_spatialite = await self.execute_fn(detect_spatialite)
if has_spatialite:
# Also hide Spatialite internal tables
hidden_tables += [
"ElementaryGeometries",
"SpatialIndex",
"geometry_columns",
"spatial_ref_sys",
"spatialite_history",
"sql_statements_log",
"sqlite_sequence",
"views_geometry_columns",
"virts_geometry_columns",
"data_licenses",
"KNN",
"KNN2",
] + [
r[0]
for r in (
await self.execute(
"""
select name from sqlite_master
where name like "idx_%"
and type = "table"
"""
)
).rows
]
# Add any tables marked as hidden in config
db_config = self.ds.config.get("databases", {}).get(self.name, {})
if "tables" in db_config:
hidden_tables += [
t for t in db_config["tables"] if db_config["tables"][t].get("hidden")
]
# Also mark as hidden any tables which start with the name of a hidden table
# e.g. "searchable_fts" implies "searchable_fts_content" should be hidden
for table_name in await self.table_names():
for hidden_table in hidden_tables[:]:
if table_name.startswith(hidden_table):
hidden_tables.append(table_name)
continue
return hidden_tables
async def view_names(self):
results = await self.execute("select name from sqlite_master where type='view'")
return [r[0] for r in results.rows]
async def get_all_foreign_keys(self):
return await self.execute_fn(get_all_foreign_keys)
async def get_table_definition(self, table, type_="table"):
table_definition_rows = list(
await self.execute(
"select sql from sqlite_master where name = :n and type=:t",
{"n": table, "t": type_},
)
)
if not table_definition_rows:
return None
bits = [table_definition_rows[0][0] + ";"]
# Add on any indexes
index_rows = list(
await self.execute(
"select sql from sqlite_master where tbl_name = :n and type='index' and sql is not null",
{"n": table},
)
)
for index_row in index_rows:
bits.append(index_row[0] + ";")
return "\n".join(bits)
async def get_view_definition(self, view):
return await self.get_table_definition(view, "view")
def __repr__(self):
tags = []
if self.is_mutable:
tags.append("mutable")
if self.is_memory:
tags.append("memory")
if self.hash:
tags.append(f"hash={self.hash}")
if self.size is not None:
tags.append(f"size={self.size}")
tags_str = ""
if tags:
tags_str = f" ({', '.join(tags)})"
return f"<Database: {self.name}{tags_str}>"
class WriteTask:
__slots__ = ("fn", "task_id", "reply_queue", "isolated_connection", "transaction")
def __init__(self, fn, task_id, reply_queue, isolated_connection, transaction):
self.fn = fn
self.task_id = task_id
self.reply_queue = reply_queue
self.isolated_connection = isolated_connection
self.transaction = transaction
class QueryInterrupted(Exception):
def __init__(self, e, sql, params):
self.e = e
self.sql = sql
self.params = params
class MultipleValues(Exception):
pass
class Results:
def __init__(self, rows, truncated, description):
self.rows = rows
self.truncated = truncated
self.description = description
@property
def columns(self):
return [d[0] for d in self.description]
def first(self):
if self.rows:
return self.rows[0]
else:
return None
def single_value(self):
if self.rows and 1 == len(self.rows) and 1 == len(self.rows[0]):
return self.rows[0][0]
else:
raise MultipleValues
def __iter__(self):
return iter(self.rows)
def __len__(self):
return len(self.rows)