Consider just 1000 rows for suggest facet, closes #2406

main
Simon Willison 2024-08-21 13:36:42 -07:00
rodzic 8a63cdccc7
commit f28ff8e4f0
2 zmienionych plików z 65 dodań i 19 usunięć

Wyświetl plik

@ -65,6 +65,8 @@ def register_facet_classes():
class Facet:
type = None
# How many rows to consider when suggesting facets:
suggest_consider = 1000
def __init__(
self,
@ -145,17 +147,6 @@ class Facet:
)
).columns
async def get_row_count(self):
if self.row_count is None:
self.row_count = (
await self.ds.execute(
self.database,
f"select count(*) from ({self.sql})",
self.params,
)
).rows[0][0]
return self.row_count
class ColumnFacet(Facet):
type = "column"
@ -170,13 +161,16 @@ class ColumnFacet(Facet):
if column in already_enabled:
continue
suggested_facet_sql = """
select {column} as value, count(*) as n from (
{sql}
) where value is not null
with limited as (select * from ({sql}) limit {suggest_consider})
select {column} as value, count(*) as n from limited
where value is not null
group by value
limit {limit}
""".format(
column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
column=escape_sqlite(column),
sql=self.sql,
limit=facet_size + 1,
suggest_consider=self.suggest_consider,
)
distinct_values = None
try:
@ -211,6 +205,17 @@ class ColumnFacet(Facet):
continue
return suggested_facets
async def get_row_count(self):
if self.row_count is None:
self.row_count = (
await self.ds.execute(
self.database,
f"select count(*) from (select * from ({self.sql}) limit {self.suggest_consider})",
self.params,
)
).rows[0][0]
return self.row_count
async def facet_results(self):
facet_results = []
facets_timed_out = []
@ -313,11 +318,14 @@ class ArrayFacet(Facet):
continue
# Is every value in this column either null or a JSON array?
suggested_facet_sql = """
with limited as (select * from ({sql}) limit {suggest_consider})
select distinct json_type({column})
from ({sql})
from limited
where {column} is not null and {column} != ''
""".format(
column=escape_sqlite(column), sql=self.sql
column=escape_sqlite(column),
sql=self.sql,
suggest_consider=self.suggest_consider,
)
try:
results = await self.ds.execute(
@ -402,7 +410,9 @@ class ArrayFacet(Facet):
order by
count(*) desc, value limit {limit}
""".format(
col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1
col=escape_sqlite(column),
sql=self.sql,
limit=facet_size + 1,
)
try:
facet_rows_results = await self.ds.execute(

Wyświetl plik

@ -1,6 +1,6 @@
from datasette.app import Datasette
from datasette.database import Database
from datasette.facets import ColumnFacet, ArrayFacet, DateFacet
from datasette.facets import Facet, ColumnFacet, ArrayFacet, DateFacet
from datasette.utils.asgi import Request
from datasette.utils import detect_json1
from .fixtures import make_app_client
@ -662,3 +662,39 @@ async def test_facet_against_in_memory_database():
assert response1.status_code == 200
response2 = await ds.client.get("/mem/t?_facet=name&_facet=name2")
assert response2.status_code == 200
@pytest.mark.asyncio
async def test_facet_only_considers_first_x_rows():
# This test works by manually fiddling with Facet.suggest_consider
ds = Datasette()
original_suggest_consider = Facet.suggest_consider
try:
Facet.suggest_consider = 40
db = ds.add_memory_database("test_facet_only_x_rows")
await db.execute_write("create table t (id integer primary key, col text)")
# First 50 rows make it look like col and col_json should be faceted
to_insert = [{"col": "one" if i % 2 else "two"} for i in range(50)]
await db.execute_write_many("insert into t (col) values (:col)", to_insert)
# Next 50 break that assumption
to_insert2 = [{"col": f"x{i}"} for i in range(50)]
await db.execute_write_many("insert into t (col) values (:col)", to_insert2)
response = await ds.client.get(
"/test_facet_only_x_rows/t.json?_extra=suggested_facets"
)
data = response.json()
assert data["suggested_facets"] == [
{
"name": "col",
"toggle_url": "http://localhost/test_facet_only_x_rows/t.json?_extra=suggested_facets&_facet=col",
}
]
# But if we set suggest_consider to 100 they are not suggested
Facet.suggest_consider = 100
response2 = await ds.client.get(
"/test_facet_only_x_rows/t.json?_extra=suggested_facets"
)
data2 = response2.json()
assert data2["suggested_facets"] == []
finally:
Facet.suggest_consider = original_suggest_consider