unify Object.new/changed generation into Protocol.load

also:
* switch back to preserving fragments in URL ids
* webmention.fetch: if URL id redirects, preserve original id in Object
pull/475/head
Ryan Barrett 2023-04-03 07:53:15 -07:00
rodzic 3a97ba587d
commit 629c1a2bd4
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
9 zmienionych plików z 144 dodań i 159 usunięć

Wyświetl plik

@ -63,7 +63,7 @@ class ActivityPub(Protocol):
# TODO: return bool or otherwise unify return value with others
@classmethod
def fetch(cls, id):
def fetch(cls, obj):
"""Tries to fetch an AS2 object.
Uses HTTP content negotiation via the Content-Type header. If the url is
@ -82,10 +82,8 @@ class ActivityPub(Protocol):
using @snarfed.org@snarfed.org's key.
Args:
id: str, object's URL id
Returns:
obj: :class:`Object` with the fetched object
obj: :class:`Object` with the id to fetch. Fills data into the as2
property.
Raises:
:class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException`
@ -96,7 +94,7 @@ class ActivityPub(Protocol):
resp = None
def _error(extra_msg=None):
msg = f"Couldn't fetch {id} as ActivityStreams 2"
msg = f"Couldn't fetch {obj.key.id()} as ActivityStreams 2"
if extra_msg:
msg += ': ' + extra_msg
logger.warning(msg)
@ -112,23 +110,12 @@ class ActivityPub(Protocol):
_error('empty response')
elif common.content_type(resp) == as2.CONTENT_TYPE:
try:
as2_json = resp.json()
return resp.json()
except requests.JSONDecodeError:
_error("Couldn't decode as JSON")
obj = Object.get_by_id(id)
if obj:
orig_as1 = obj.as1
obj.clear()
obj.as2 = as2_json
obj.changed = as1.activity_changed(orig_as1, obj.as1)
obj.new = False
else:
obj = Object(id=id, as2=as2_json)
obj.new = True
return obj
obj = _get(id, CONNEG_HEADERS_AS2_HTML)
if obj:
obj.as2 = _get(obj.key.id(), CONNEG_HEADERS_AS2_HTML)
if obj.as2:
return obj
# look in HTML to find AS2 link
@ -140,8 +127,8 @@ class ActivityPub(Protocol):
if not (link and link['href']):
_error('no AS2 available')
obj = _get(link['href'], as2.CONNEG_HEADERS)
if obj:
obj.as2 = _get(link['href'], as2.CONNEG_HEADERS)
if obj.as2:
return obj
_error()
@ -160,7 +147,7 @@ class ActivityPub(Protocol):
if not sig:
error('No HTTP Signature', status=401)
logging.info('Verifying HTTP Signature')
logger.info('Verifying HTTP Signature')
logger.info(f'Headers: {json_dumps(dict(request.headers), indent=2)}')
# parse_signature_header lower-cases all keys
@ -182,7 +169,7 @@ class ActivityPub(Protocol):
obj_id = as1.get_object(activity).get('id')
if (activity.get('type') == 'Delete' and obj_id and
keyId == fragmentless(obj_id)):
logging.info('Object/actor being deleted is also keyId')
logger.info('Object/actor being deleted is also keyId')
key_actor = Object(id=keyId, source_protocol='activitypub', deleted=True)
key_actor.put()
else:

Wyświetl plik

@ -130,7 +130,7 @@ class FollowCallback(indieauth.Callback):
return
me = auth_entity.key.id()
logging.info(f'Storing indieauthed-me: {me} in session cookie')
logger.info(f'Storing indieauthed-me: {me} in session cookie')
session['indieauthed-me'] = me
domain = util.domain_from_link(me)

Wyświetl plik

@ -308,7 +308,7 @@ class Object(StringIdModel):
# assert (self.as2 is not None) ^ (self.bsky is not None) ^ (self.mf2 is not None), \
# f'{self.as2} {self.bsky} {self.mf2}'
if bool(self.as2) + bool(self.bsky) + bool(self.mf2) > 1:
logging.warning(f'{self.key} has multiple! {bool(self.as2)} {bool(self.bsky)} {bool(self.mf2)}')
logger.warning(f'{self.key} has multiple! {bool(self.as2)} {bool(self.bsky)} {bool(self.mf2)}')
if self.our_as1 is not None:
return common.redirect_unwrap(self.our_as1)
@ -359,7 +359,7 @@ class Object(StringIdModel):
for prop in 'as2', 'bsky', 'mf2':
val = getattr(self, prop, None)
if val:
logging.warning(f'Wiping out {prop}: {json_dumps(val, indent=2)}')
logger.warning(f'Wiping out {prop}: {json_dumps(val, indent=2)}')
setattr(self, prop, None)
def proxy_url(self):

Wyświetl plik

@ -82,7 +82,7 @@ class Protocol:
raise NotImplementedError()
@classmethod
def fetch(cls, id):
def fetch(cls, obj):
"""Fetches a protocol-specific object and returns it in an :class:`Object`.
To be implemented by subclasses. The returned :class:`Object` is loaded
@ -90,10 +90,8 @@ class Protocol:
yet written back to the datastore.
Args:
id: str, object's URL id
Returns:
obj: :class:`Object` with the fetched object
obj: :class:`Object` with the id to fetch. Data is filled into one of
the protocol-specific properties, eg as2, mf2, bsky.
Raises:
:class:`werkzeug.HTTPException` if the fetch fails
@ -131,7 +129,7 @@ class Protocol:
obj.populate(source_protocol=cls.LABEL, **props)
obj.put()
logging.info(f'Got AS1: {json_dumps(obj.as1, indent=2)}')
logger.info(f'Got AS1: {json_dumps(obj.as1, indent=2)}')
if obj.type not in SUPPORTED_TYPES:
error(f'Sorry, {obj.type} activities are not supported yet.', status=501)
@ -161,7 +159,7 @@ class Protocol:
follower = models.Follower.get_by_id(
models.Follower._id(dest=followee_domain, src=actor_id))
if follower:
logging.info(f'Marking {follower} inactive')
logger.info(f'Marking {follower} inactive')
follower.status = 'inactive'
follower.put()
else:
@ -362,7 +360,7 @@ class Protocol:
error(msg, status=int(errors[0][0] or 502))
@classmethod
def load(cls, id):
def load(cls, id, refresh=False):
"""Loads and returns an Object from memory cache, datastore, or HTTP fetch.
Assumes id is a URL. Any fragment at the end is stripped before loading.
@ -379,29 +377,48 @@ class Protocol:
Args:
id: str
refresh: boolean, whether to fetch the object remotely even if we have
it stored
Returns: :class:`Object`
Raises:
:class:`requests.HTTPError`, anything else that :meth:`fetch` raises
"""
id = util.fragmentless(id)
with objects_cache_lock:
cached = objects_cache.get(id)
if cached:
return cached
if not refresh:
with objects_cache_lock:
cached = objects_cache.get(id)
if cached:
return cached
logger.info(f'Loading Object {id}')
orig_as1 = None
obj = models.Object.get_by_id(id)
if obj and (obj.as1 or obj.deleted):
logger.info(' got from datastore')
with objects_cache_lock:
objects_cache[id] = obj
return obj
obj.new = False
orig_as1 = obj.as1
if not refresh:
with objects_cache_lock:
objects_cache[id] = obj
return obj
if refresh:
logger.info(' forced refresh requested')
if obj:
obj.clear()
else:
logger.info(f' not in datastore')
obj = models.Object(id=id)
obj.new = True
obj.changed = False
cls.fetch(obj)
if orig_as1:
obj.new = False
obj.changed = as1.activity_changed(orig_as1, obj.as1)
logger.info(f'Object not in datastore or has no data: {id}')
obj = cls.fetch(id)
obj.source_protocol = cls.LABEL
obj.put()

Wyświetl plik

@ -1297,12 +1297,12 @@ class ActivityPubUtilsTest(testutil.TestCase):
mock_get.assert_not_called()
@patch('requests.get')
def test_load_strips_fragment(self, mock_get):
stored = Object(id='http://the/id', as2=AS2_OBJ)
def test_load_preserves_fragment(self, mock_get):
stored = Object(id='http://the/id#frag', as2=AS2_OBJ)
stored.put()
protocol.objects_cache.clear()
got = ActivityPub.load('http://the/id#ignore')
got = ActivityPub.load('http://the/id#frag')
self.assert_entities_equal(stored, got)
mock_get.assert_not_called()
@ -1358,11 +1358,10 @@ class ActivityPubUtilsTest(testutil.TestCase):
@patch('requests.get')
def test_fetch_direct(self, mock_get):
mock_get.return_value = AS2
got = ActivityPub.fetch('http://orig')
self.assertEqual(AS2_OBJ, got.as2)
obj = Object(id='http://orig')
ActivityPub.fetch(obj)
self.assertEqual(AS2_OBJ, obj.as2)
self.assertTrue(got.new)
self.assertFalse(got.changed)
mock_get.assert_has_calls((
self.as2_req('http://orig'),
))
@ -1370,11 +1369,10 @@ class ActivityPubUtilsTest(testutil.TestCase):
@patch('requests.get')
def test_fetch_via_html(self, mock_get):
mock_get.side_effect = [HTML_WITH_AS2, AS2]
got = ActivityPub.fetch('http://orig')
self.assertEqual(AS2_OBJ, got.as2)
obj = Object(id='http://orig')
ActivityPub.fetch(obj)
self.assertEqual(AS2_OBJ, obj.as2)
self.assertTrue(got.new)
self.assertFalse(got.changed)
mock_get.assert_has_calls((
self.as2_req('http://orig'),
self.as2_req('http://as2', headers=common.as2.CONNEG_HEADERS),
@ -1384,26 +1382,26 @@ class ActivityPubUtilsTest(testutil.TestCase):
def test_fetch_only_html(self, mock_get):
mock_get.return_value = HTML
with self.assertRaises(BadGateway):
ActivityPub.fetch('http://orig')
ActivityPub.fetch(Object(id='http://orig'))
@patch('requests.get')
def test_fetch_not_acceptable(self, mock_get):
mock_get.return_value=NOT_ACCEPTABLE
with self.assertRaises(BadGateway):
ActivityPub.fetch('http://orig')
ActivityPub.fetch(Object(id='http://orig'))
@patch('requests.get')
def test_fetch_ssl_error(self, mock_get):
mock_get.side_effect = requests.exceptions.SSLError
with self.assertRaises(BadGateway):
ActivityPub.fetch('http://orig')
ActivityPub.fetch(Object(id='http://orig'))
@patch('requests.get')
def test_fetch_no_content(self, mock_get):
mock_get.return_value = self.as2_resp('')
with self.assertRaises(BadGateway):
got = ActivityPub.fetch('http://the/id')
ActivityPub.fetch(Object(id='http://the/id'))
mock_get.assert_has_calls([self.as2_req('http://the/id')])
@ -1412,39 +1410,10 @@ class ActivityPubUtilsTest(testutil.TestCase):
mock_get.return_value = self.as2_resp('XYZ not JSON')
with self.assertRaises(BadGateway):
got = ActivityPub.fetch('http://the/id')
ActivityPub.fetch(Object(id='http://the/id'))
mock_get.assert_has_calls([self.as2_req('http://the/id')])
@patch('requests.get')
def test_fetch_content_changed(self, mock_get):
Object(id='http://orig', as2={
**NOTE_OBJECT,
'content': 'something else',
}).put()
mock_get.return_value = self.as2_resp(NOTE_OBJECT)
obj = ActivityPub.fetch('http://orig')
self.assert_equals(NOTE_OBJECT, obj.as2)
self.assertFalse(obj.new)
self.assertTrue(obj.changed)
mock_get.assert_has_calls((
self.as2_req('http://orig'),
))
@patch('requests.get')
def test_fetch_content_unchanged(self, mock_get):
Object(id='http://orig', as2=NOTE_OBJECT).put()
mock_get.return_value = self.as2_resp(NOTE_OBJECT)
obj = ActivityPub.fetch('http://orig')
self.assert_equals(NOTE_OBJECT, obj.as2)
self.assertFalse(obj.new)
self.assertFalse(obj.changed)
mock_get.assert_has_calls((
self.as2_req('http://orig'),
))
def test_postprocess_as2_idempotent(self):
g.user = self.make_user('foo.com')

Wyświetl plik

@ -60,16 +60,25 @@ class ProtocolTest(testutil.TestCase):
)
def test_load(self):
obj = Object(id='foo', our_as1={'x': 'y'})
FakeProtocol.objects = {'foo': obj}
self.assert_entities_equal(obj, FakeProtocol.load('foo'))
FakeProtocol.objects['foo'] = {'x': 'y'}
loaded = FakeProtocol.load('foo')
self.assert_equals({'x': 'y'}, loaded.our_as1)
self.assertFalse(loaded.changed)
self.assertTrue(loaded.new)
self.assertIsNotNone(Object.get_by_id('foo'))
self.assertEqual(['foo'], FakeProtocol.fetched)
def test_load_already_stored(self):
stored = Object(id='foo', our_as1={'x': 'y'})
stored.put()
self.assert_entities_equal(stored, FakeProtocol.load('foo'))
loaded = FakeProtocol.load('foo')
self.assert_equals({'x': 'y'}, loaded.our_as1)
self.assertFalse(loaded.changed)
self.assertFalse(loaded.new)
self.assertEqual([], FakeProtocol.fetched)
@patch('requests.get')
@ -77,5 +86,32 @@ class ProtocolTest(testutil.TestCase):
stored = Object(id='foo', deleted=True)
stored.put()
self.assert_entities_equal(stored, FakeProtocol.load('foo'))
mock_get.assert_not_called()
loaded = FakeProtocol.load('foo')
self.assert_entities_equal(stored, loaded)
self.assertFalse(loaded.changed)
self.assertFalse(loaded.new)
self.assertEqual([], FakeProtocol.fetched)
@patch('requests.get')
def test_load_refresh_unchanged(self, mock_get):
obj = Object(id='foo', our_as1={'x': 'stored'})
obj.put()
FakeProtocol.objects['foo'] = {'x': 'stored'}
loaded = FakeProtocol.load('foo', refresh=True)
self.assert_entities_equal(obj, loaded)
self.assertFalse(obj.changed)
self.assertFalse(obj.new)
self.assertEqual(['foo'], FakeProtocol.fetched)
@patch('requests.get')
def test_load_refresh_changed(self, mock_get):
Object(id='foo', our_as1={'content': 'stored'}).put()
FakeProtocol.objects['foo'] = {'content': 'new'}
loaded = FakeProtocol.load('foo', refresh=True)
self.assert_equals({'content': 'new'}, loaded.our_as1)
self.assertTrue(loaded.changed)
self.assertFalse(loaded.new)
self.assertEqual(['foo'], FakeProtocol.fetched)

Wyświetl plik

@ -277,7 +277,7 @@ class WebmentionTest(testutil.TestCase):
</html>
"""
self.follow_fragment = requests_response(
self.follow_fragment_html, url='https://user.com/follow#2',
self.follow_fragment_html, url='https://user.com/follow',
content_type=CONTENT_TYPE_HTML)
self.follow_fragment_mf2 = \
util.parse_mf2(self.follow_fragment_html, id='2')['items'][0]
@ -361,36 +361,34 @@ class WebmentionTest(testutil.TestCase):
def test_fetch(self, mock_get, mock_post):
mock_get.return_value = self.reply
obj = Object(id='https://user.com/post')
with app.test_request_context('/'):
obj = Webmention.fetch('https://user.com/post')
Webmention.fetch(obj)
self.assert_equals(self.reply_as1, obj.as1)
self.assertFalse(obj.changed)
self.assertTrue(obj.new)
def test_fetch_redirect(self, mock_get, mock_post):
mock_get.return_value = requests_response(
REPOST_HTML, url='https://orig/url', redirected_url='http://new/url')
obj = Object(id='https://orig/url')
with app.test_request_context('/'):
obj = Webmention.fetch('https://orig/url')
Webmention.fetch(obj)
self.assert_equals({**self.repost_mf2, 'url': 'http://new/url'}, obj.mf2)
self.assert_equals(self.repost_as1, obj.as1)
self.assertFalse(obj.changed)
self.assertTrue(obj.new)
self.assertIsNone(Object.get_by_id('http://orig/url'))
self.assertIsNone(Object.get_by_id('http://new/url'))
# TODO
@skip
def test_fetch_bad_source_url(self, mock_get, mock_post):
with app.test_request_context('/'), self.assertRaises(ValueError):
Webmention.fetch('bad')
Webmention.fetch(Object(id='bad'))
def test_fetch_error(self, mock_get, mock_post):
mock_get.return_value = requests_response(self.reply_html, status=405)
with app.test_request_context('/'), self.assertRaises(BadGateway) as e:
Webmention.fetch('https://foo')
Webmention.fetch(Object(id='https://foo'))
def test_fetch_run_authorship(self, mock_get, mock_post):
mock_get.side_effect = [
@ -405,34 +403,11 @@ class WebmentionTest(testutil.TestCase):
]
return_value = self.reply
obj = Object(id='https://user.com/reply')
with app.test_request_context('/'):
obj = Webmention.fetch('https://user.com/reply')
Webmention.fetch(obj)
self.assert_equals(self.reply_as1, obj.as1)
def test_fetch_content_changed(self, mock_get, mock_post):
orig_mf2 = copy.deepcopy(self.note_mf2)
orig_mf2['properties']['content'] = ['something else']
Object(id='https://user.com/post', mf2=orig_mf2).put()
mock_get.return_value = self.note
with app.test_request_context('/'):
obj = Webmention.fetch('https://user.com/post')
self.assert_equals({**self.note_mf2, 'url': 'https://user.com/post'}, obj.mf2)
self.assert_equals(self.note_as1, obj.as1)
self.assertTrue(obj.changed)
self.assertFalse(obj.new)
def test_fetch_content_unchanged(self, mock_get, mock_post):
Object(id='https://user.com/post', mf2=self.note_mf2).put()
mock_get.return_value = self.note
with app.test_request_context('/'):
obj = Webmention.fetch('https://user.com/post')
self.assertFalse(obj.changed)
self.assertFalse(obj.new)
def test_send(self, mock_get, mock_post):
mock_get.return_value = WEBMENTION_REL_LINK
mock_post.return_value = requests_response()
@ -1087,7 +1062,7 @@ class WebmentionTest(testutil.TestCase):
self.assert_equals(200, got.status_code)
mock_get.assert_has_calls((
self.req('https://user.com/follow#2'),
self.req('https://user.com/follow'),
self.as2_req('https://mas.to/mrs-foo'),
))
@ -1120,7 +1095,7 @@ class WebmentionTest(testutil.TestCase):
def test_error_fragment_missing(self, mock_get, mock_post):
mock_get.return_value = requests_response(
self.follow_fragment_html, url='https://user.com/follow#a',
self.follow_fragment_html, url='https://user.com/follow',
content_type=CONTENT_TYPE_HTML)
got = self.client.post('/webmention', data={
@ -1128,6 +1103,9 @@ class WebmentionTest(testutil.TestCase):
'target': 'https://fed.brid.gy/',
})
self.assert_equals(400, got.status_code)
mock_get.assert_has_calls((
self.req('https://user.com/follow'),
))
def test_error(self, mock_get, mock_post):
mock_get.side_effect = [self.follow, self.actor]

Wyświetl plik

@ -48,16 +48,18 @@ class FakeProtocol(protocol.Protocol):
@classmethod
def send(cls, obj, url, log_data=True):
logger.info(f'FakeProtocol.send {url}')
sent.append((obj, url))
cls.sent.append((obj, url))
cls.objects[obj.key.id()] = obj
@classmethod
def fetch(cls, id):
logger.info(f'FakeProtocol.send {id}')
def fetch(cls, obj):
id = obj.key.id()
logger.info(f'FakeProtocol.load {id}')
cls.fetched.append(id)
if id in cls.objects:
return cls.objects[id]
obj.our_as1 = cls.objects[id]
return obj
raise requests.HTTPError(response=util.Struct(status_code='410'))

Wyświetl plik

@ -23,6 +23,7 @@ import activitypub
from app import app
import common
from models import Follower, Object, Target, User
from protocol import Protocol
logger = logging.getLogger(__name__)
@ -30,7 +31,7 @@ logger = logging.getLogger(__name__)
TASKS_LOCATION = 'us-central1'
class Webmention(View):
class Webmention(Protocol):
"""Webmention protocol implementation."""
LABEL = 'webmention'
@ -49,16 +50,22 @@ class Webmention(View):
return True
@classmethod
def fetch(cls, url):
def fetch(cls, obj):
"""Fetches a URL over HTTP and extracts its microformats2.
See :meth:`Protocol.fetch` for details.
Follows redirects, but doesn't change the original URL in obj's id! The
:class:`Model` class doesn't allow that anyway, but more importantly, we
want to preserve that original URL becase other objects may refer to it
instead of the final redirect destination URL.
See :meth:`Protocol.fetch` for other background.
"""
url = obj.key.id()
try:
parsed = util.fetch_mf2(url, gateway=True,
require_backlink=common.host_url().rstrip('/'))
except ValueError as e:
logging.info(str(e))
logger.info(str(e))
error(str(e))
if parsed is None:
@ -80,10 +87,10 @@ class Webmention(View):
# duplicated in microformats2.json_to_object
author = util.get_first(props, 'author')
if not isinstance(author, dict):
logging.info(f'Fetching full authorship for author {author}')
logger.info(f'Fetching full authorship for author {author}')
author = mf2util.find_author({'items': [entry]}, hentry=entry,
fetch_mf2_func=util.fetch_mf2)
logging.info(f'Got: {author}')
logger.info(f'Got: {author}')
if author:
props['author'] = [{
"type": ["h-card"],
@ -93,18 +100,7 @@ class Webmention(View):
},
}]
obj = Object.get_by_id(parsed['url'])
if obj:
orig_as1 = obj.as1
obj.clear()
obj.mf2 = entry
obj.changed = as1.activity_changed(orig_as1, obj.as1)
obj.new = False
else:
obj = Object(id=parsed['url'], mf2=entry)
obj.new = True
obj.changed = False
obj.mf2 = entry
return obj
@ -145,7 +141,7 @@ class WebmentionView(View):
# fetch source page
try:
obj = Webmention.fetch(source)
obj = Webmention.load(source, refresh=True)
except ValueError as e:
error(f'Bad source URL: {source}: {e}')
@ -153,7 +149,7 @@ class WebmentionView(View):
props = obj.mf2['properties']
author_urls = microformats2.get_string_urls(props.get('author', []))
if author_urls and not g.user.is_homepage(author_urls[0]):
logging.info(f'Overriding author {author_urls[0]} with {g.user.actor_id()}')
logger.info(f'Overriding author {author_urls[0]} with {g.user.actor_id()}')
props['author'] = [g.user.actor_id()]
logger.info(f'Converted to AS1: {obj.type}: {json_dumps(obj.as1, indent=2)}')
@ -187,7 +183,7 @@ class WebmentionView(View):
},
)
msg = f'Enqueued task {task.name} to deliver to followers...'
logging.info(msg)
logger.info(msg)
return msg, 202
inboxes_to_targets = self._activitypub_targets(obj)