kopia lustrzana https://github.com/snarfed/bridgy-fed
Revise Protocol.load shallow and refresh kwargs, rename to local and remote
and use in for_id to optimize datastore usage.pull/553/head
rodzic
146abbf718
commit
21ab9e34ed
82
protocol.py
82
protocol.py
|
@ -176,14 +176,13 @@ class Protocol:
|
||||||
if not id:
|
if not id:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# check for our per-protocol subdomains
|
# step 1: check for our per-protocol subdomains
|
||||||
if util.is_web(id):
|
if util.is_web(id):
|
||||||
by_domain = Protocol.for_domain(id)
|
by_domain = Protocol.for_domain(id)
|
||||||
if by_domain:
|
if by_domain:
|
||||||
return by_domain
|
return by_domain
|
||||||
|
|
||||||
candidates = []
|
# step 2: check if any Protocols say conclusively that they own it
|
||||||
|
|
||||||
# sort to be deterministic
|
# sort to be deterministic
|
||||||
protocols = sorted(set(p for p in PROTOCOLS.values() if p),
|
protocols = sorted(set(p for p in PROTOCOLS.values() if p),
|
||||||
key=lambda p: p.__name__)
|
key=lambda p: p.__name__)
|
||||||
|
@ -198,13 +197,18 @@ class Protocol:
|
||||||
if len(candidates) == 1:
|
if len(candidates) == 1:
|
||||||
return candidates[0]
|
return candidates[0]
|
||||||
|
|
||||||
|
# step 3: look for existing Objects in the datastore
|
||||||
|
obj = Protocol.load(id, remote=False)
|
||||||
|
if obj and obj.source_protocol:
|
||||||
|
logger.info(f'{obj.key} has source_protocol {obj.source_protocol}')
|
||||||
|
return PROTOCOLS[obj.source_protocol]
|
||||||
|
|
||||||
|
# step 4: fetch over the network
|
||||||
for protocol in candidates:
|
for protocol in candidates:
|
||||||
logger.info(f'Trying {protocol.__name__}')
|
logger.info(f'Trying {protocol.__name__}')
|
||||||
try:
|
try:
|
||||||
obj = protocol.load(id)
|
protocol.load(id, local=False, remote=True)
|
||||||
if obj.source_protocol:
|
return protocol
|
||||||
logger.info(f"{obj.key} has source_protocol {obj.source_protocol}")
|
|
||||||
return PROTOCOLS[obj.source_protocol]
|
|
||||||
except werkzeug.exceptions.HTTPException:
|
except werkzeug.exceptions.HTTPException:
|
||||||
# internal error we generated ourselves; try next protocol
|
# internal error we generated ourselves; try next protocol
|
||||||
pass
|
pass
|
||||||
|
@ -244,11 +248,9 @@ class Protocol:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetch(cls, obj, **kwargs):
|
def fetch(cls, obj, **kwargs):
|
||||||
"""Fetches a protocol-specific object and returns it in an :class:`Object`.
|
"""Fetches a protocol-specific object and populates it in an :class:`Object`.
|
||||||
|
|
||||||
To be implemented by subclasses. The returned :class:`Object` is loaded
|
To be implemented by subclasses.
|
||||||
from the datastore, if it exists there, then updated in memory but not
|
|
||||||
yet written back to the datastore.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
obj: :class:`Object` with the id to fetch. Data is filled into one of
|
obj: :class:`Object` with the id to fetch. Data is filled into one of
|
||||||
|
@ -595,55 +597,63 @@ class Protocol:
|
||||||
error(msg, status=int(errors[0][0] or 502))
|
error(msg, status=int(errors[0][0] or 502))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, id, refresh=False, shallow=True, **kwargs):
|
def load(cls, id, remote=None, local=True, **kwargs):
|
||||||
"""Loads and returns an Object from memory cache, datastore, or HTTP fetch.
|
"""Loads and returns an Object from memory cache, datastore, or HTTP fetch.
|
||||||
|
|
||||||
Note that :meth:`Object._post_put_hook` updates the cache.
|
Note that :meth:`Object._post_put_hook` updates the cache.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
id: str
|
id: str
|
||||||
refresh: boolean, whether to fetch the object remotely even if we have
|
|
||||||
it stored
|
remote: boolean, whether to fetch the object over the network. If True,
|
||||||
shallow: boolean, whether to only fetch from the datastore. If it
|
fetches even if we already have the object stored, and updates our
|
||||||
isn't there, returns None instead of fetching over the network.
|
stored copy. If False and we don't have the object stored, returns
|
||||||
|
None. Default (None) means to fetch over the network only if we
|
||||||
|
don't already have it stored.
|
||||||
|
local: boolean, whether to load from the datastore before
|
||||||
|
fetching over the network. If False, still stores back to the
|
||||||
|
datastore after a successful remote fetch.
|
||||||
kwargs: passed through to :meth:`fetch()`
|
kwargs: passed through to :meth:`fetch()`
|
||||||
|
|
||||||
Returns: :class:`Object` or None if it isn't in the datastore and shallow
|
Returns: :class:`Object` or None if it isn't in the datastore and remote
|
||||||
is True
|
is False
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
:class:`requests.HTTPError`, anything else that :meth:`fetch` raises
|
:class:`requests.HTTPError`, anything else that :meth:`fetch` raises
|
||||||
"""
|
"""
|
||||||
assert not (refresh and shallow)
|
assert local or remote is not False
|
||||||
|
|
||||||
if not refresh:
|
logger.info(f'Loading Object {id} local={local} remote={remote}')
|
||||||
|
|
||||||
|
if remote is not True:
|
||||||
with objects_cache_lock:
|
with objects_cache_lock:
|
||||||
cached = objects_cache.get(id)
|
cached = objects_cache.get(id)
|
||||||
if cached:
|
if cached:
|
||||||
return cached
|
return cached
|
||||||
|
|
||||||
logger.info(f'Loading Object {id}')
|
obj = orig_as1 = None
|
||||||
orig_as1 = None
|
if local:
|
||||||
obj = Object.get_by_id(id)
|
obj = Object.get_by_id(id)
|
||||||
if obj and (obj.as1 or obj.deleted):
|
if obj and (obj.as1 or obj.deleted):
|
||||||
logger.info(' got from datastore')
|
logger.info(' got from datastore')
|
||||||
obj.new = False
|
obj.new = False
|
||||||
orig_as1 = obj.as1
|
orig_as1 = obj.as1
|
||||||
if not refresh:
|
if remote is not True:
|
||||||
with objects_cache_lock:
|
with objects_cache_lock:
|
||||||
objects_cache[id] = obj
|
objects_cache[id] = obj
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
if refresh:
|
if remote is True:
|
||||||
logger.info(' forced refresh requested')
|
logger.info(' remote=True, forced refresh requested')
|
||||||
|
|
||||||
if obj:
|
if obj:
|
||||||
obj.clear()
|
obj.clear()
|
||||||
obj.new = False
|
obj.new = False
|
||||||
else:
|
else:
|
||||||
logger.info(' not in datastore')
|
if local:
|
||||||
if shallow:
|
logger.info(' not in datastore')
|
||||||
logger.info(' shallow load requested, returning None')
|
if remote is False:
|
||||||
|
logger.info(' remote=False; returning None')
|
||||||
return None
|
return None
|
||||||
obj = Object(id=id)
|
obj = Object(id=id)
|
||||||
obj.new = True
|
obj.new = True
|
||||||
|
|
|
@ -11,7 +11,9 @@ from .testutil import Fake, TestCase
|
||||||
from activitypub import ActivityPub
|
from activitypub import ActivityPub
|
||||||
from app import app
|
from app import app
|
||||||
from models import Follower, Object, PROTOCOLS, User
|
from models import Follower, Object, PROTOCOLS, User
|
||||||
|
import protocol
|
||||||
from protocol import Protocol
|
from protocol import Protocol
|
||||||
|
import requests
|
||||||
from ui import UIProtocol
|
from ui import UIProtocol
|
||||||
from web import Web
|
from web import Web
|
||||||
|
|
||||||
|
@ -189,52 +191,73 @@ class ProtocolTest(TestCase):
|
||||||
|
|
||||||
self.assertEqual([], Fake.fetched)
|
self.assertEqual([], Fake.fetched)
|
||||||
|
|
||||||
def test_load_refresh_existing_empty(self):
|
def test_load_remote_true_existing_empty(self):
|
||||||
Fake.objects['foo'] = {'x': 'y'}
|
Fake.objects['foo'] = {'x': 'y'}
|
||||||
Object(id='foo').put()
|
Object(id='foo').put()
|
||||||
|
|
||||||
loaded = Fake.load('foo', refresh=True)
|
loaded = Fake.load('foo', remote=True)
|
||||||
self.assertEqual({'x': 'y'}, loaded.as1)
|
self.assertEqual({'x': 'y'}, loaded.as1)
|
||||||
self.assertTrue(loaded.changed)
|
self.assertTrue(loaded.changed)
|
||||||
self.assertFalse(loaded.new)
|
self.assertFalse(loaded.new)
|
||||||
self.assertEqual(['foo'], Fake.fetched)
|
self.assertEqual(['foo'], Fake.fetched)
|
||||||
|
|
||||||
def test_load_refresh_new_empty(self):
|
def test_load_remote_true_new_empty(self):
|
||||||
Fake.objects['foo'] = None
|
Fake.objects['foo'] = None
|
||||||
Object(id='foo', our_as1={'x': 'y'}).put()
|
Object(id='foo', our_as1={'x': 'y'}).put()
|
||||||
|
|
||||||
loaded = Fake.load('foo', refresh=True)
|
loaded = Fake.load('foo', remote=True)
|
||||||
self.assertIsNone(loaded.as1)
|
self.assertIsNone(loaded.as1)
|
||||||
self.assertTrue(loaded.changed)
|
self.assertTrue(loaded.changed)
|
||||||
self.assertFalse(loaded.new)
|
self.assertFalse(loaded.new)
|
||||||
self.assertEqual(['foo'], Fake.fetched)
|
self.assertEqual(['foo'], Fake.fetched)
|
||||||
|
|
||||||
def test_load_refresh_unchanged(self):
|
def test_load_remote_true_unchanged(self):
|
||||||
obj = Object(id='foo', our_as1={'x': 'stored'})
|
obj = Object(id='foo', our_as1={'x': 'stored'})
|
||||||
obj.put()
|
obj.put()
|
||||||
Fake.objects['foo'] = {'x': 'stored'}
|
Fake.objects['foo'] = {'x': 'stored'}
|
||||||
|
|
||||||
loaded = Fake.load('foo', refresh=True)
|
loaded = Fake.load('foo', remote=True)
|
||||||
self.assert_entities_equal(obj, loaded)
|
self.assert_entities_equal(obj, loaded)
|
||||||
self.assertFalse(obj.changed)
|
self.assertFalse(obj.changed)
|
||||||
self.assertFalse(obj.new)
|
self.assertFalse(obj.new)
|
||||||
self.assertEqual(['foo'], Fake.fetched)
|
self.assertEqual(['foo'], Fake.fetched)
|
||||||
|
|
||||||
def test_load_refresh_changed(self):
|
def test_load_remote_true_changed(self):
|
||||||
Object(id='foo', our_as1={'content': 'stored'}).put()
|
Object(id='foo', our_as1={'content': 'stored'}).put()
|
||||||
Fake.objects['foo'] = {'content': 'new'}
|
Fake.objects['foo'] = {'content': 'new'}
|
||||||
|
|
||||||
loaded = Fake.load('foo', refresh=True)
|
loaded = Fake.load('foo', remote=True)
|
||||||
self.assert_equals({'content': 'new'}, loaded.our_as1)
|
self.assert_equals({'content': 'new'}, loaded.our_as1)
|
||||||
self.assertTrue(loaded.changed)
|
self.assertTrue(loaded.changed)
|
||||||
self.assertFalse(loaded.new)
|
self.assertFalse(loaded.new)
|
||||||
self.assertEqual(['foo'], Fake.fetched)
|
self.assertEqual(['foo'], Fake.fetched)
|
||||||
|
|
||||||
def test_load_shallow_missing(self):
|
def test_load_remote_false(self):
|
||||||
self.assertIsNone(Fake.load('nope', shallow=True))
|
self.assertIsNone(Fake.load('nope', remote=False))
|
||||||
self.assertEqual([], Fake.fetched)
|
self.assertEqual([], Fake.fetched)
|
||||||
|
|
||||||
obj = Object(id='foo', our_as1={'content': 'stored'})
|
obj = Object(id='foo', our_as1={'content': 'stored'})
|
||||||
obj.put()
|
obj.put()
|
||||||
self.assert_entities_equal(obj, Fake.load('foo', shallow=True))
|
self.assert_entities_equal(obj, Fake.load('foo', remote=False))
|
||||||
self.assertEqual([], Fake.fetched)
|
self.assertEqual([], Fake.fetched)
|
||||||
|
|
||||||
|
def test_local_false_missing(self):
|
||||||
|
with self.assertRaises(requests.HTTPError) as e:
|
||||||
|
Fake.load('foo', local=False)
|
||||||
|
self.assertEqual(410, e.response.status_code)
|
||||||
|
|
||||||
|
self.assertEqual(['foo'], Fake.fetched)
|
||||||
|
|
||||||
|
def test_local_false_existing(self):
|
||||||
|
obj = Object(id='foo', our_as1={'content': 'stored'}, source_protocol='ui')
|
||||||
|
obj.put()
|
||||||
|
del protocol.objects_cache['foo']
|
||||||
|
|
||||||
|
Fake.objects['foo'] = {'foo': 'bar'}
|
||||||
|
Fake.load('foo', local=False)
|
||||||
|
self.assert_object('foo', source_protocol='fake', our_as1={'foo': 'bar'})
|
||||||
|
self.assertEqual(['foo'], Fake.fetched)
|
||||||
|
|
||||||
|
def test_remote_false_local_false_assert(self):
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
Fake.load('nope', local=False, remote=False)
|
||||||
|
|
2
web.py
2
web.py
|
@ -494,7 +494,7 @@ def webmention_task():
|
||||||
|
|
||||||
# fetch source page
|
# fetch source page
|
||||||
try:
|
try:
|
||||||
obj = Web.load(source, refresh=True, check_backlink=True)
|
obj = Web.load(source, remote=True, check_backlink=True)
|
||||||
except BadRequest as e:
|
except BadRequest as e:
|
||||||
error(str(e.description), status=304)
|
error(str(e.description), status=304)
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
|
|
Ładowanie…
Reference in New Issue