Protocol.load/fetch returns True/False (or None) to mean "not this protocol"

ie, nothing failed, but the given id eitehr doesn't below to the given protocol, or fetched successfully but its data belongs to a different protocol.

vs raising exceptions for fetches that fail or error, eg network connection problems, Web got HTML with microformats2 but no h-entry, ActivityPub got non-AS2, etc.

used in Protocol.for_id: when a given Protocol's fetch doesn't work, we move on and try the next protocol _if_ it returned False (ie wasn't fetchable with that protocol, but we fail fast if it raises an exception (ie fetch failed).
circle
Ryan Barrett 2023-07-14 12:45:47 -07:00
rodzic dcc9b772b1
commit e194ccb3ad
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
10 zmienionych plików z 146 dodań i 71 usunięć

Wyświetl plik

@ -25,7 +25,6 @@ from common import (
error,
host_url,
is_blocklisted,
NoMicroformats,
redirect_unwrap,
redirect_wrap,
TLD_BLOCKLIST,
@ -205,24 +204,37 @@ class ActivityPub(User, Protocol):
Signs the request with the current user's key. If not provided, defaults to
using @snarfed.org@snarfed.org's key.
See :meth:`Protocol.fetch` for more details.
Args:
obj: :class:`Object` with the id to fetch. Fills data into the as2
property.
kwargs: ignored
Returns:
True if the object was fetched and populated successfully,
False otherwise
Raises:
:class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException`
If we raise a werkzeug HTTPException, it will have an additional
requests_response attribute with the last requests.Response we received.
"""
url = obj.key.id()
if not util.is_web(url):
logger.info(f'{url} is not a URL')
return False
resp = None
def _error(extra_msg=None):
msg = f"Couldn't fetch {obj.key.id()} as ActivityStreams 2"
msg = f"Couldn't fetch {url} as ActivityStreams 2"
if extra_msg:
msg += ': ' + extra_msg
logger.warning(msg)
# protocol.for_id depends on us raising this when an AP network
# fetch fails. if we change that, update for_id too!
err = BadGateway(msg)
err.requests_response = resp
raise err
@ -230,7 +242,15 @@ class ActivityPub(User, Protocol):
def _get(url, headers):
"""Returns None if we fetched and populated, resp otherwise."""
nonlocal resp
resp = signed_get(url, headers=headers, gateway=True)
try:
resp = signed_get(url, headers=headers, gateway=True)
except BadGateway as e:
# ugh, this is ugly, should be something structured
if '406 Client Error' in str(e):
return
raise
if not resp.content:
_error('empty response')
elif common.content_type(resp) in as2.CONTENT_TYPES:
@ -239,24 +259,30 @@ class ActivityPub(User, Protocol):
except requests.JSONDecodeError:
_error("Couldn't decode as JSON")
obj.as2 = _get(obj.key.id(), CONNEG_HEADERS_AS2_HTML)
obj.as2 = _get(url, CONNEG_HEADERS_AS2_HTML)
if obj.as2:
return obj
return True
elif not resp:
return False
# look in HTML to find AS2 link
if common.content_type(resp) != 'text/html':
_error('no AS2 available')
logger.info('no AS2 available')
return False
parsed = util.parse_html(resp)
link = parsed.find('link', rel=('alternate', 'self'), type=(
as2.CONTENT_TYPE, as2.CONTENT_TYPE_LD))
if not (link and link['href']):
_error('no AS2 available')
logger.info('no AS2 available')
return False
obj.as2 = _get(link['href'], as2.CONNEG_HEADERS)
if obj.as2:
return obj
return True
_error()
return False
@classmethod
def serve(cls, obj):
@ -322,8 +348,10 @@ class ActivityPub(User, Protocol):
else:
raise
if key_actor.deleted:
if key_actor and key_actor.deleted:
abort(202, f'Ignoring, signer {keyId} is already deleted')
elif not key_actor:
error(f"Couldn't load {keyId} to verify signature", status=401)
key = key_actor.as2.get("publicKey", {}).get('publicKeyPem')
logger.info(f'Verifying signature for {request.path} with key {key}')
@ -641,10 +669,7 @@ def actor(protocol, domain):
cls = PROTOCOLS[protocol]
g.user = cls.get_or_create(domain)
if not g.user.obj or not g.user.obj.as1:
try:
g.user.obj = cls.load(f'https://{domain}/', gateway=True)
except NoMicroformats as e:
pass
g.user.obj = cls.load(f'https://{domain}/', gateway=True)
# TODO: unify with common.actor()
actor = g.user.as2() or {
@ -702,7 +727,8 @@ def inbox(protocol=None, domain=None):
# load user
if protocol and domain:
g.user = PROTOCOLS[protocol].get_or_create(domain, direct=False) # receiving user
# receiving user
g.user = PROTOCOLS[protocol].get_or_create(domain, direct=False)
if not g.user.direct and actor_id:
# this is a deliberate interaction with an indirect receiving user;
# create a local AP User for the sending user

Wyświetl plik

@ -13,7 +13,6 @@ from Crypto.Util import number
from flask import abort, g, make_response, request
from oauth_dropins.webutil import util, webmention
from oauth_dropins.webutil.appengine_info import DEBUG
from werkzeug.exceptions import BadRequest
logger = logging.getLogger(__name__)
@ -62,11 +61,6 @@ DOMAIN_BLOCKLIST = frozenset((
CACHE_TIME = timedelta(seconds=60)
class NoMicroformats(BadRequest):
"""Raised by :meth:`Web.fetch` when a page has no microformats2."""
pass
def base64_to_long(x):
"""Converts x from URL safe base64 encoding to a long integer.

Wyświetl plik

@ -68,7 +68,9 @@ def convert(dest, _):
# load, and maybe fetch. if it's a post/update, redirect to inner object.
obj = src_cls.load(url)
if not obj.as1:
if not obj:
error(f"Couldn't load {url}", status=404)
elif not obj.as1:
error(f'Stored object for {id} has no data', status=404)
type = as1.object_type(obj.as1)

Wyświetl plik

@ -116,6 +116,10 @@ class FollowCallback(indieauth.Callback):
# TODO(#512): follower will always be Web here, but we should generalize
# followee support in UI and here across protocols
followee = ActivityPub.load(as2_url)
if not followee:
flash(f"Couldn't load {as2_url} as AS2")
return redirect(g.user.user_page_path('following'))
followee_id = followee.as1.get('id')
inbox = followee.as2.get('inbox')
if not followee_id or not inbox:
@ -198,6 +202,8 @@ class UnfollowCallback(indieauth.Callback):
if not followee.obj or not followee.obj.as1:
# fetch to get full followee so we can find its target to deliver to
followee.obj = ActivityPub.load(followee_id)
if not followee.obj:
error("Couldn't load {followee_id} as AS2")
followee.put()
# TODO(#529): generalize

Wyświetl plik

@ -210,9 +210,13 @@ class Protocol:
for protocol in candidates:
logger.info(f'Trying {protocol.__name__}')
try:
protocol.load(id, local=False, remote=True)
logger.info(f' {protocol.__name__} owns {id}')
return protocol
if protocol.load(id, local=False, remote=True):
logger.info(f' {protocol.__name__} owns {id}')
return protocol
except werkzeug.exceptions.BadGateway:
# we tried and failed fetching the id over the network.
# this depends on ActivityPub.fetch raising this!
return None
except werkzeug.exceptions.HTTPException as e:
# internal error we generated ourselves; try next protocol
pass
@ -272,6 +276,10 @@ class Protocol:
def fetch(cls, obj, **kwargs):
"""Fetches a protocol-specific object and populates it in an :class:`Object`.
Errors are raised as exceptions. If this method returns False, the fetch
didn't fail but didn't succeed either, eg the id isn't valid for this
protocol, or the fetch didn't return valid data for this protocol.
To be implemented by subclasses.
Args:
@ -281,6 +289,10 @@ class Protocol:
Raises:
:class:`werkzeug.HTTPException` if the fetch fails
Returns:
True if the object was fetched and populated successfully,
False otherwise
"""
raise NotImplementedError()
@ -485,7 +497,7 @@ class Protocol:
if actor and actor.keys() == set(['id']):
logger.info('Fetching actor so we have name, profile photo, etc')
actor_obj = cls.load(actor['id'])
if actor_obj.as1:
if actor_obj and actor_obj.as1:
obj.our_as1 = {**obj.as1, 'actor': actor_obj.as1}
# fetch object if necessary so we can render it in feeds
@ -525,6 +537,8 @@ class Protocol:
from_cls = cls
from_obj = from_cls.load(from_id)
if not from_obj:
error(f"Couldn't load {from_id}")
if not from_obj.as1:
from_obj.our_as1 = from_as1
@ -555,7 +569,7 @@ class Protocol:
continue
to_obj = to_cls.load(to_id)
if not to_obj.as1:
if to_obj and not to_obj.as1:
to_obj.our_as1 = to_as1
to_obj.put()
@ -610,15 +624,12 @@ class Protocol:
obj: :class:`Object`, the same one if the input obj is an activity,
otherwise a new one
"""
obj_actor = as1.get_owner(obj.as1)
if not obj_actor and g.user:
obj_actor = g.user.key.id()
now = util.now().isoformat()
if obj.type not in ('note', 'article', 'comment'):
return obj
obj_actor = as1.get_owner(obj.as1)
now = util.now().isoformat()
# this is a raw post; wrap it in a create or update activity
if obj.changed:
logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
@ -877,8 +888,9 @@ class Protocol:
datastore after a successful remote fetch.
kwargs: passed through to :meth:`fetch()`
Returns: :class:`Object`, or None if it isn't in the datastore and remote
is False
Returns: :class:`Object`, or None if:
* it isn't fetchable, eg a non-URL string for Web
* remote is False and it isn't in the cache or datastore
Raises:
:class:`requests.HTTPError`, anything else that :meth:`fetch` raises
@ -927,7 +939,10 @@ class Protocol:
obj.new = True
obj.changed = False
cls.fetch(obj, **kwargs)
fetched = cls.fetch(obj, **kwargs)
if not fetched:
return None
if obj.new is False:
obj.changed = obj.activity_changed(orig_as1)

Wyświetl plik

@ -408,8 +408,6 @@ class ActivityPubTest(TestCase):
self.assertEqual(400, resp.status_code)
def test_inbox_reply_object(self, mock_head, mock_get, mock_post):
mock_get.side_effect = [self.as2_resp(LIKE_ACTOR)]
self._test_inbox_reply(REPLY_OBJECT, mock_head, mock_get, mock_post)
self.assert_object('http://mas.to/reply/id',
@ -419,12 +417,11 @@ class ActivityPubTest(TestCase):
# auto-generated post activity
self.assert_object(
'http://mas.to/reply/id#bridgy-fed-create',
users=[ndb.Key(ActivityPub, 'user.com'), self.user.key],
users=[self.user.key],
source_protocol='activitypub',
our_as1={
**as2.to_as1(REPLY),
'id': 'http://mas.to/reply/id#bridgy-fed-create',
'actor': as2.to_as1(LIKE_ACTOR),
'published': '2022-01-02T03:04:05+00:00',
},
status='complete',
@ -434,8 +431,6 @@ class ActivityPubTest(TestCase):
)
def test_inbox_reply_object_wrapped(self, mock_head, mock_get, mock_post):
mock_get.side_effect = [self.as2_resp(LIKE_ACTOR)]
self._test_inbox_reply(REPLY_OBJECT_WRAPPED, mock_head, mock_get, mock_post)
self.assert_object('http://mas.to/reply/id',
@ -445,12 +440,11 @@ class ActivityPubTest(TestCase):
# auto-generated post activity
self.assert_object(
'http://mas.to/reply/id#bridgy-fed-create',
users=[ndb.Key(ActivityPub, 'user.com'), self.user.key],
users=[self.user.key],
source_protocol='activitypub',
our_as1={
**as2.to_as1(REPLY),
'id': 'http://mas.to/reply/id#bridgy-fed-create',
'actor': as2.to_as1(LIKE_ACTOR),
'published': '2022-01-02T03:04:05+00:00',
},
status='complete',
@ -1742,14 +1736,18 @@ class ActivityPubUtilsTest(TestCase):
@patch('requests.get')
def test_fetch_only_html(self, mock_get):
mock_get.return_value = HTML
with self.assertRaises(BadGateway):
ActivityPub.fetch(Object(id='http://orig'))
obj = Object(id='http://orig')
self.assertFalse(ActivityPub.fetch(obj))
self.assertIsNone(obj.as1)
@patch('requests.get')
def test_fetch_not_acceptable(self, mock_get):
mock_get.return_value = NOT_ACCEPTABLE
with self.assertRaises(BadGateway):
ActivityPub.fetch(Object(id='http://orig'))
obj = Object(id='http://orig')
self.assertFalse(ActivityPub.fetch(obj))
self.assertIsNone(obj.as1)
@patch('requests.get')
def test_fetch_ssl_error(self, mock_get):
@ -1775,6 +1773,11 @@ class ActivityPubUtilsTest(TestCase):
mock_get.assert_has_calls([self.as2_req('http://the/id')])
def test_fetch_non_url(self):
obj = Object(id='x y z')
self.assertFalse(ActivityPub.fetch(obj))
self.assertIsNone(obj.as1)
@skip
def test_serve(self):
obj = Object(id='http://orig', as2=LIKE)

Wyświetl plik

@ -113,6 +113,13 @@ class ProtocolTest(TestCase):
self.assertEqual(ActivityPub, Protocol.for_id('http://ap/actor'))
self.assertIn(self.as2_req('http://ap/actor'), mock_get.mock_calls)
@patch('requests.get')
def test_for_id_activitypub_fetch_fails(self, mock_get):
mock_get.return_value = requests_response('', status=403)
self.assertIsNone(Protocol.for_id('http://ap/actor'))
self.assertIn(self.as2_req('http://ap/actor'), mock_get.mock_calls)
mock_get.assert_called_once()
@patch('requests.get')
def test_for_id_web_fetch(self, mock_get):
mock_get.return_value = requests_response(ACTOR_HTML)
@ -231,10 +238,7 @@ class ProtocolTest(TestCase):
self.assert_entities_equal(obj, Protocol.load('foo', remote=False))
def test_load_local_false_missing(self):
with self.assertRaises(requests.HTTPError) as e:
Fake.load('foo', local=False)
self.assertEqual(410, e.response.status_code)
self.assertIsNone(Fake.load('foo', local=False))
self.assertEqual(['foo'], Fake.fetched)
def test_load_local_false_existing(self):
@ -412,13 +416,12 @@ class ProtocolReceiveTest(TestCase):
'objectType': 'activity',
'verb': 'update',
'id': update_id,
'actor': 'fake:user',
'object': post_as1,
},
delivered=['shared:target'],
type='update',
labels=['user', 'activity'],
users=[g.user.key],
users=[],
)
self.assertEqual([(obj, 'shared:target')], Fake.sent)
@ -1061,7 +1064,6 @@ class ProtocolReceiveTest(TestCase):
'object': ['other:dan', 'fake:alice'],
}
# with self.assertRaises(NoContent) as e:
self.assertEqual('OK', OtherFake.receive(follow_as1))
self.assertEqual(2, len(Fake.sent))

Wyświetl plik

@ -1935,6 +1935,26 @@ class WebUtilTest(TestCase):
Web.fetch(obj)
self.assertEqual(500, e.status_code)
def test_fetch_not_html(self, mock_get, __):
mock_get.return_value = self.as2_resp({})
obj = Object(id='https://user.com/post')
self.assertFalse(Web.fetch(obj))
self.assertIsNone(obj.as1)
def test_fetch_non_url(self, mock_get, __):
obj = Object(id='x y z')
self.assertFalse(Web.fetch(obj))
self.assertIsNone(obj.as1)
def test_fetch_no_mf2(self, mock_get, __):
mock_get.return_value = requests_response(
'<html>\n<body>foo</body>\n</html>')
obj = Object(id='https://user.com/post')
self.assertFalse(Web.fetch(obj))
self.assertIsNone(obj.as1)
def test_send(self, mock_get, mock_post):
mock_get.return_value = WEBMENTION_REL_LINK
mock_post.return_value = requests_response()

Wyświetl plik

@ -76,9 +76,9 @@ class Fake(User, protocol.Protocol):
if id in cls.fetchable:
obj.our_as1 = cls.fetchable[id]
return obj
return True
raise requests.HTTPError(response=util.Struct(status_code='410'))
return False
@classmethod
def serve(cls, obj):

35
web.py
Wyświetl plik

@ -17,7 +17,7 @@ from oauth_dropins.webutil.appengine_info import APP_ID
from oauth_dropins.webutil.flask_util import error, flash
from oauth_dropins.webutil.util import json_dumps, json_loads
from oauth_dropins.webutil import webmention
from requests import HTTPError, RequestException, URLRequired
from requests import HTTPError, RequestException
from werkzeug.exceptions import BadGateway, BadRequest, HTTPException, NotFound
import common
@ -192,12 +192,14 @@ class Web(User, Protocol):
pass
# check home page
self.obj = None
self.has_hcard = False
try:
self.obj = Web.load(self.web_url(), remote=True, gateway=True)
self.has_hcard = True
except (BadRequest, NotFound, common.NoMicroformats):
self.obj = None
self.has_hcard = False
if self.obj:
self.has_hcard = True
except (BadRequest, NotFound):
pass
self.put()
return self
@ -306,6 +308,10 @@ class Web(User, Protocol):
kwargs: ignored
"""
url = obj.key.id()
if not util.is_web(url):
logger.info(f'{url} is not a URL')
return False
is_homepage = urlparse(url).path.strip('/') == ''
require_backlink = (common.host_url().rstrip('/')
@ -315,25 +321,26 @@ class Web(User, Protocol):
try:
parsed = util.fetch_mf2(url, gateway=gateway,
require_backlink=require_backlink)
except (ValueError, URLRequired) as e:
except ValueError as e:
error(str(e))
if parsed is None:
error(f'id {urlparse(url).fragment} not found in {url}')
elif not parsed.get('items'):
logger.info(f'No microformats2 found in {url}')
return False
# find mf2 item
if is_homepage:
logger.info(f"{url} is user's web url")
entry = mf2util.representative_hcard(parsed, parsed['url'])
logger.info(f'Representative h-card: {json_dumps(entry, indent=2)}')
if not entry:
msg = f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {parsed['url']}"
logging.info(msg)
raise common.NoMicroformats(msg)
error(f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {parsed['url']}")
logger.info(f'Representative h-card: {json_dumps(entry, indent=2)}')
else:
entry = mf2util.find_first_entry(parsed, ['h-entry'])
if not entry:
error(f'No microformats2 found in {url}')
error(f'No microformats2 h-entry found in {url}')
# store final URL in mf2 object, and also default url property to it,
# since that's the fallback for AS1/AS2 id
@ -372,7 +379,7 @@ class Web(User, Protocol):
}])
obj.mf2 = entry
return obj
return True
@classmethod
def serve(cls, obj):
@ -530,8 +537,8 @@ def webmention_task():
'object': source,
})
if not obj.mf2 and obj.type != 'delete':
error(f'No microformats2 found in {source}', status=304)
if not obj or (not obj.mf2 and obj.type != 'delete'):
error(f"Couldn't load {source} as microformats2 HTML", status=304)
elif obj.mf2:
# default actor to user
props = obj.mf2['properties']