Revise Protocol.load shallow and refresh kwargs, rename to local and remote

and use in for_id to optimize datastore usage.
pull/553/head
Ryan Barrett 2023-06-18 07:29:54 -07:00
rodzic 146abbf718
commit 21ab9e34ed
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
3 zmienionych plików z 81 dodań i 48 usunięć

Wyświetl plik

@ -176,14 +176,13 @@ class Protocol:
if not id: if not id:
return None return None
# check for our per-protocol subdomains # step 1: check for our per-protocol subdomains
if util.is_web(id): if util.is_web(id):
by_domain = Protocol.for_domain(id) by_domain = Protocol.for_domain(id)
if by_domain: if by_domain:
return by_domain return by_domain
candidates = [] # step 2: check if any Protocols say conclusively that they own it
# sort to be deterministic # sort to be deterministic
protocols = sorted(set(p for p in PROTOCOLS.values() if p), protocols = sorted(set(p for p in PROTOCOLS.values() if p),
key=lambda p: p.__name__) key=lambda p: p.__name__)
@ -198,13 +197,18 @@ class Protocol:
if len(candidates) == 1: if len(candidates) == 1:
return candidates[0] return candidates[0]
# step 3: look for existing Objects in the datastore
obj = Protocol.load(id, remote=False)
if obj and obj.source_protocol:
logger.info(f'{obj.key} has source_protocol {obj.source_protocol}')
return PROTOCOLS[obj.source_protocol]
# step 4: fetch over the network
for protocol in candidates: for protocol in candidates:
logger.info(f'Trying {protocol.__name__}') logger.info(f'Trying {protocol.__name__}')
try: try:
obj = protocol.load(id) protocol.load(id, local=False, remote=True)
if obj.source_protocol: return protocol
logger.info(f"{obj.key} has source_protocol {obj.source_protocol}")
return PROTOCOLS[obj.source_protocol]
except werkzeug.exceptions.HTTPException: except werkzeug.exceptions.HTTPException:
# internal error we generated ourselves; try next protocol # internal error we generated ourselves; try next protocol
pass pass
@ -244,11 +248,9 @@ class Protocol:
@classmethod @classmethod
def fetch(cls, obj, **kwargs): def fetch(cls, obj, **kwargs):
"""Fetches a protocol-specific object and returns it in an :class:`Object`. """Fetches a protocol-specific object and populates it in an :class:`Object`.
To be implemented by subclasses. The returned :class:`Object` is loaded To be implemented by subclasses.
from the datastore, if it exists there, then updated in memory but not
yet written back to the datastore.
Args: Args:
obj: :class:`Object` with the id to fetch. Data is filled into one of obj: :class:`Object` with the id to fetch. Data is filled into one of
@ -595,55 +597,63 @@ class Protocol:
error(msg, status=int(errors[0][0] or 502)) error(msg, status=int(errors[0][0] or 502))
@classmethod @classmethod
def load(cls, id, refresh=False, shallow=True, **kwargs): def load(cls, id, remote=None, local=True, **kwargs):
"""Loads and returns an Object from memory cache, datastore, or HTTP fetch. """Loads and returns an Object from memory cache, datastore, or HTTP fetch.
Note that :meth:`Object._post_put_hook` updates the cache. Note that :meth:`Object._post_put_hook` updates the cache.
Args: Args:
id: str id: str
refresh: boolean, whether to fetch the object remotely even if we have
it stored remote: boolean, whether to fetch the object over the network. If True,
shallow: boolean, whether to only fetch from the datastore. If it fetches even if we already have the object stored, and updates our
isn't there, returns None instead of fetching over the network. stored copy. If False and we don't have the object stored, returns
None. Default (None) means to fetch over the network only if we
don't already have it stored.
local: boolean, whether to load from the datastore before
fetching over the network. If False, still stores back to the
datastore after a successful remote fetch.
kwargs: passed through to :meth:`fetch()` kwargs: passed through to :meth:`fetch()`
Returns: :class:`Object` or None if it isn't in the datastore and shallow Returns: :class:`Object` or None if it isn't in the datastore and remote
is True is False
Raises: Raises:
:class:`requests.HTTPError`, anything else that :meth:`fetch` raises :class:`requests.HTTPError`, anything else that :meth:`fetch` raises
""" """
assert not (refresh and shallow) assert local or remote is not False
if not refresh: logger.info(f'Loading Object {id} local={local} remote={remote}')
if remote is not True:
with objects_cache_lock: with objects_cache_lock:
cached = objects_cache.get(id) cached = objects_cache.get(id)
if cached: if cached:
return cached return cached
logger.info(f'Loading Object {id}') obj = orig_as1 = None
orig_as1 = None if local:
obj = Object.get_by_id(id) obj = Object.get_by_id(id)
if obj and (obj.as1 or obj.deleted): if obj and (obj.as1 or obj.deleted):
logger.info(' got from datastore') logger.info(' got from datastore')
obj.new = False obj.new = False
orig_as1 = obj.as1 orig_as1 = obj.as1
if not refresh: if remote is not True:
with objects_cache_lock: with objects_cache_lock:
objects_cache[id] = obj objects_cache[id] = obj
return obj return obj
if refresh: if remote is True:
logger.info(' forced refresh requested') logger.info(' remote=True, forced refresh requested')
if obj: if obj:
obj.clear() obj.clear()
obj.new = False obj.new = False
else: else:
logger.info(' not in datastore') if local:
if shallow: logger.info(' not in datastore')
logger.info(' shallow load requested, returning None') if remote is False:
logger.info(' remote=False; returning None')
return None return None
obj = Object(id=id) obj = Object(id=id)
obj.new = True obj.new = True

Wyświetl plik

@ -11,7 +11,9 @@ from .testutil import Fake, TestCase
from activitypub import ActivityPub from activitypub import ActivityPub
from app import app from app import app
from models import Follower, Object, PROTOCOLS, User from models import Follower, Object, PROTOCOLS, User
import protocol
from protocol import Protocol from protocol import Protocol
import requests
from ui import UIProtocol from ui import UIProtocol
from web import Web from web import Web
@ -189,52 +191,73 @@ class ProtocolTest(TestCase):
self.assertEqual([], Fake.fetched) self.assertEqual([], Fake.fetched)
def test_load_refresh_existing_empty(self): def test_load_remote_true_existing_empty(self):
Fake.objects['foo'] = {'x': 'y'} Fake.objects['foo'] = {'x': 'y'}
Object(id='foo').put() Object(id='foo').put()
loaded = Fake.load('foo', refresh=True) loaded = Fake.load('foo', remote=True)
self.assertEqual({'x': 'y'}, loaded.as1) self.assertEqual({'x': 'y'}, loaded.as1)
self.assertTrue(loaded.changed) self.assertTrue(loaded.changed)
self.assertFalse(loaded.new) self.assertFalse(loaded.new)
self.assertEqual(['foo'], Fake.fetched) self.assertEqual(['foo'], Fake.fetched)
def test_load_refresh_new_empty(self): def test_load_remote_true_new_empty(self):
Fake.objects['foo'] = None Fake.objects['foo'] = None
Object(id='foo', our_as1={'x': 'y'}).put() Object(id='foo', our_as1={'x': 'y'}).put()
loaded = Fake.load('foo', refresh=True) loaded = Fake.load('foo', remote=True)
self.assertIsNone(loaded.as1) self.assertIsNone(loaded.as1)
self.assertTrue(loaded.changed) self.assertTrue(loaded.changed)
self.assertFalse(loaded.new) self.assertFalse(loaded.new)
self.assertEqual(['foo'], Fake.fetched) self.assertEqual(['foo'], Fake.fetched)
def test_load_refresh_unchanged(self): def test_load_remote_true_unchanged(self):
obj = Object(id='foo', our_as1={'x': 'stored'}) obj = Object(id='foo', our_as1={'x': 'stored'})
obj.put() obj.put()
Fake.objects['foo'] = {'x': 'stored'} Fake.objects['foo'] = {'x': 'stored'}
loaded = Fake.load('foo', refresh=True) loaded = Fake.load('foo', remote=True)
self.assert_entities_equal(obj, loaded) self.assert_entities_equal(obj, loaded)
self.assertFalse(obj.changed) self.assertFalse(obj.changed)
self.assertFalse(obj.new) self.assertFalse(obj.new)
self.assertEqual(['foo'], Fake.fetched) self.assertEqual(['foo'], Fake.fetched)
def test_load_refresh_changed(self): def test_load_remote_true_changed(self):
Object(id='foo', our_as1={'content': 'stored'}).put() Object(id='foo', our_as1={'content': 'stored'}).put()
Fake.objects['foo'] = {'content': 'new'} Fake.objects['foo'] = {'content': 'new'}
loaded = Fake.load('foo', refresh=True) loaded = Fake.load('foo', remote=True)
self.assert_equals({'content': 'new'}, loaded.our_as1) self.assert_equals({'content': 'new'}, loaded.our_as1)
self.assertTrue(loaded.changed) self.assertTrue(loaded.changed)
self.assertFalse(loaded.new) self.assertFalse(loaded.new)
self.assertEqual(['foo'], Fake.fetched) self.assertEqual(['foo'], Fake.fetched)
def test_load_shallow_missing(self): def test_load_remote_false(self):
self.assertIsNone(Fake.load('nope', shallow=True)) self.assertIsNone(Fake.load('nope', remote=False))
self.assertEqual([], Fake.fetched) self.assertEqual([], Fake.fetched)
obj = Object(id='foo', our_as1={'content': 'stored'}) obj = Object(id='foo', our_as1={'content': 'stored'})
obj.put() obj.put()
self.assert_entities_equal(obj, Fake.load('foo', shallow=True)) self.assert_entities_equal(obj, Fake.load('foo', remote=False))
self.assertEqual([], Fake.fetched) self.assertEqual([], Fake.fetched)
def test_local_false_missing(self):
with self.assertRaises(requests.HTTPError) as e:
Fake.load('foo', local=False)
self.assertEqual(410, e.response.status_code)
self.assertEqual(['foo'], Fake.fetched)
def test_local_false_existing(self):
obj = Object(id='foo', our_as1={'content': 'stored'}, source_protocol='ui')
obj.put()
del protocol.objects_cache['foo']
Fake.objects['foo'] = {'foo': 'bar'}
Fake.load('foo', local=False)
self.assert_object('foo', source_protocol='fake', our_as1={'foo': 'bar'})
self.assertEqual(['foo'], Fake.fetched)
def test_remote_false_local_false_assert(self):
with self.assertRaises(AssertionError):
Fake.load('nope', local=False, remote=False)

2
web.py
Wyświetl plik

@ -494,7 +494,7 @@ def webmention_task():
# fetch source page # fetch source page
try: try:
obj = Web.load(source, refresh=True, check_backlink=True) obj = Web.load(source, remote=True, check_backlink=True)
except BadRequest as e: except BadRequest as e:
error(str(e.description), status=304) error(str(e.description), status=304)
except HTTPError as e: except HTTPError as e: