webmention => AP: handle multiple in-reply-to links

fixes #65. thanks again for reporting, @nekr0z!
thib
Ryan Barrett 2020-06-06 08:39:44 -07:00
rodzic 2a91f01393
commit 2c15820ef6
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
4 zmienionych plików z 110 dodań i 83 usunięć

Wyświetl plik

@ -28,6 +28,7 @@ l/
l3/
local/
local3/
local3.7/
pydoc/
pydocs/
python3/

2
.gitignore vendored
Wyświetl plik

@ -3,6 +3,6 @@
datastore.dat*
/docs/_build/
/l
/local
/local*
private_notes
TAGS

Wyświetl plik

@ -114,6 +114,7 @@ class WebmentionTest(testutil.TestCase):
<div class="h-entry">
<a class="u-url" href="http://a/reply"></a>
<p class="e-content p-name">
<a class="u-in-reply-to" href="http://not/fediverse"></a>
<a class="u-in-reply-to" href="http://orig/post">foo bar</a>
<a href="http://localhost/"></a>
</p>
@ -163,7 +164,10 @@ class WebmentionTest(testutil.TestCase):
'id': 'http://localhost/r/http://a/reply',
'url': 'http://localhost/r/http://a/reply',
'name': 'foo ☕ bar',
'content': '<a class="u-in-reply-to" href="http://orig/post">foo ☕ bar</a>\n<a href="http://localhost/"></a>',
'content': """\
<a class="u-in-reply-to" href="http://not/fediverse"></a>
<a class="u-in-reply-to" href="http://orig/post">foo bar</a>
<a href="http://localhost/"></a>""",
'inReplyTo': 'tag:orig,2017:as2',
'cc': [
AS2_PUBLIC_AUDIENCE,
@ -250,7 +254,12 @@ class WebmentionTest(testutil.TestCase):
},
}
self.activitypub_gets = [self.reply, self.orig_as2, self.actor]
self.not_fediverse = requests_response("""\
<html>
<body>foo</body>
</html>
""", url='http://not/fediverse', content_type=CONTENT_TYPE_HTML)
self.activitypub_gets = [self.reply, self.not_fediverse, self.orig_as2, self.actor]
def verify_salmon(self, mock_post):
args, kwargs = mock_post.call_args
@ -365,6 +374,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls((
self.req('http://a/reply'),
self.req('http://not/fediverse', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/author', headers=CONNEG_HEADERS_AS2_HTML),
))
@ -421,7 +431,8 @@ class WebmentionTest(testutil.TestCase):
orig_as2_resp = requests_response(
self.orig_as2_data, content_type=CONTENT_TYPE_AS2 + '; charset=utf-8')
mock_get.side_effect = [self.reply, orig_as2_resp, self.actor]
mock_get.side_effect = [self.reply, self.not_fediverse, orig_as2_resp,
self.actor]
mock_post.return_value = requests_response('abc xyz', status=203)
got = application.get_response(
@ -433,6 +444,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls((
self.req('http://a/reply'),
self.req('http://not/fediverse', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/author', headers=CONNEG_HEADERS_AS2_HTML),
))
@ -492,8 +504,8 @@ class WebmentionTest(testutil.TestCase):
self.assertEqual(self.repost_mf2, json_loads(resp.source_mf2))
def test_activitypub_link_rel_alternate_as2(self, mock_get, mock_post):
mock_get.side_effect = [self.reply, self.orig_html_as2, self.orig_as2,
self.actor]
mock_get.side_effect = [self.reply, self.not_fediverse,
self.orig_html_as2, self.orig_as2, self.actor]
mock_post.return_value = requests_response('abc xyz')
got = application.get_response(
@ -505,6 +517,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls((
self.req('http://a/reply'),
self.req('http://not/fediverse', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/as2', headers=CONNEG_HEADERS_AS2),
self.req('http://orig/author', headers=CONNEG_HEADERS_AS2_HTML),
@ -668,7 +681,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls((
self.req('http://a/follow'),
self.req('http://followee', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://followee/', headers=CONNEG_HEADERS_AS2_HTML),
))
args, kwargs = mock_post.call_args
@ -681,14 +694,15 @@ class WebmentionTest(testutil.TestCase):
rsa_key = kwargs['auth'].header_signer._rsa._key
self.assertEqual(self.key.private_pem(), rsa_key.exportKey())
resp = Response.get_by_id('http://a/follow http://followee')
resp = Response.get_by_id('http://a/follow http://followee/')
self.assertEqual('out', resp.direction)
self.assertEqual('activitypub', resp.protocol)
self.assertEqual('complete', resp.status)
self.assertEqual(self.follow_mf2, json_loads(resp.source_mf2))
def test_salmon_reply(self, mock_get, mock_post):
mock_get.side_effect = [self.reply, self.orig_html_atom, self.orig_atom]
mock_get.side_effect = [self.reply, self.not_fediverse,
self.orig_html_atom, self.orig_atom]
got = application.get_response(
'/webmention', method='POST', body=urlencode({
@ -699,6 +713,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls((
self.req('http://a/reply'),
self.req('http://not/fediverse', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/atom'),
))
@ -716,10 +731,12 @@ class WebmentionTest(testutil.TestCase):
self.assertEqual({
'type': 'text/html',
'href': 'http://orig/post',
'ref': 'tag:fed.brid.gy,2017-08-22:orig-post'
'ref': 'tag:fed.brid.gy,2017-08-22:orig-post',
}, entry['thr_in-reply-to'])
self.assertEqual(
'<a class="u-in-reply-to" href="http://orig/post">foo ☕ bar</a><br></br>\n<a href="http://localhost/"></a>',
self.assertEqual("""\
<a class="u-in-reply-to" href="http://not/fediverse"></a><br></br>
<a class="u-in-reply-to" href="http://orig/post">foo bar</a><br></br>
<a href="http://localhost/"></a>""",
entry.content[0]['value'])
resp = Response.get_by_id('http://a/reply http://orig/post')
@ -780,7 +797,8 @@ class WebmentionTest(testutil.TestCase):
'href': 'http://orig/@ryan/salmon',
}],
})
mock_get.side_effect = [self.reply, self.orig_html_atom, orig_atom, webfinger]
mock_get.side_effect = [self.reply, self.not_fediverse,
self.orig_html_atom, orig_atom, webfinger]
got = application.get_response('/webmention', method='POST', body=urlencode({
'source': 'http://a/reply',
@ -798,7 +816,7 @@ class WebmentionTest(testutil.TestCase):
<html>
<body>foo</body>
</html>""", 'http://orig/url')
mock_get.side_effect = [self.reply, orig_no_atom]
mock_get.side_effect = [self.reply, self.not_fediverse, orig_no_atom]
got = application.get_response('/webmention', method='POST', body=urlencode({
'source': 'http://a/reply',
@ -816,7 +834,8 @@ class WebmentionTest(testutil.TestCase):
<link href='atom/1' rel='alternate' type='application/atom+xml'>
</meta>
</html>""", 'http://orig/url')
mock_get.side_effect = [self.reply, orig_relative, self.orig_atom]
mock_get.side_effect = [self.reply, self.not_fediverse, orig_relative,
self.orig_atom]
got = application.get_response('/webmention', method='POST', body=urlencode({
'source': 'http://a/reply',
@ -836,7 +855,8 @@ class WebmentionTest(testutil.TestCase):
<link href='atom/1' rel='alternate' type='application/atom+xml'>
</meta>
</html>""", 'http://orig/url')
mock_get.side_effect = [self.reply, orig_base, self.orig_atom]
mock_get.side_effect = [self.reply, self.not_fediverse, orig_base,
self.orig_atom]
got = application.get_response('/webmention', method='POST', body=urlencode({
'source': 'http://a/reply',

Wyświetl plik

@ -70,16 +70,6 @@ class WebmentionHandler(common.Handler):
self.try_activitypub() or self.try_salmon()
# if self.source_domain not in SKIP_EMAIL_DOMAINS:
# try:
# msg = 'Bridgy Fed: new webmention from %s' % source
# mail.send_mail(
# sender='admin@bridgy-federated.appspotmail.com',
# to='bridgy-fed@ryanb.org',
# subject=msg, body=msg)
# except BaseException:
# logging.warning('Error sending email', exc_info=True)
def try_activitypub(self):
"""Returns True if we attempted ActivityPub delivery, False otherwise."""
targets = self._activitypub_targets()
@ -88,6 +78,7 @@ class WebmentionHandler(common.Handler):
key = MagicKey.get_or_create(self.source_domain)
error = None
last_success = None
# TODO: collect by inbox, add 'to' fields, de-dupe inboxes and recipients
@ -102,6 +93,7 @@ class WebmentionHandler(common.Handler):
try:
last = activitypub.send(source_activity, inbox, self.source_domain)
resp.status = 'complete'
last_success = last
except BaseException as e:
error = e
resp.status = 'error'
@ -109,37 +101,38 @@ class WebmentionHandler(common.Handler):
resp.put()
# Pass the AP response status code and body through as our response
if not error:
self.response.status_int = last.status_code
self.response.write(last.text)
if last_success:
self.response.status_int = last_success.status_code
self.response.write(last_success.text)
elif isinstance(error, requests.HTTPError):
self.response.status_int = error.status_code
self.response.write(error.text)
else:
self.response.write(str(error))
return not error
return bool(last_success)
def _single_target(self):
def _targets(self):
"""
Returns: string URL, the source's inReplyTo or object (if appropriate)
Returns: list of string URLs, the source's inReplyTos or objects
(if appropriate)
"""
target = util.get_first(self.source_obj, 'inReplyTo')
if target:
return util.get_url(target)
targets = util.get_urls(self.source_obj, 'inReplyTo')
if targets:
return targets
if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT:
return util.get_url(util.get_first(self.source_obj, 'object'))
return util.get_urls(self.source_obj, 'object')
def _activitypub_targets(self):
"""
Returns: list of (Response, string inbox URL)
"""
# if there's an in-reply-to, like-of, or repost-of, that's the target.
# if there's in-reply-to, like-of, or repost-of, they're the targets.
# otherwise, it's all followers' inboxes.
target = self._single_target()
targets = self._targets()
if not target:
if not targets:
# interpret this as a Create or Update, deliver it to followers
inboxes = []
for follower in Follower.query().filter(
@ -157,57 +150,67 @@ class WebmentionHandler(common.Handler):
inbox)
for inbox in inboxes if inbox]
# fetch target page as AS2 object
try:
self.target_resp = common.get_as2(target)
except (requests.HTTPError, exc.HTTPBadGateway) as e:
self.target_resp = getattr(e, 'response', None)
if self.target_resp and self.target_resp.status_code // 100 == 2:
content_type = common.content_type(self.target_resp) or ''
if content_type.startswith('text/html'):
# TODO: pass e.response to try_salmon()'s target_resp
return False # make post() try Salmon
raise
target_url = self.target_resp.url or target
resps_and_inbox_urls = []
for target in targets:
# fetch target page as AS2 object
try:
self.target_resp = common.get_as2(target)
except (requests.HTTPError, exc.HTTPBadGateway) as e:
self.target_resp = getattr(e, 'response', None)
if self.target_resp and self.target_resp.status_code // 100 == 2:
content_type = common.content_type(self.target_resp) or ''
if content_type.startswith('text/html'):
# TODO: pass e.response to try_salmon()'s target_resp
continue # give up
raise
target_url = self.target_resp.url or target
resp = Response.get_or_create(
source=self.source_url, target=target_url, direction='out',
protocol='activitypub', source_mf2=json_dumps(self.source_mf2))
resp = Response.get_or_create(
source=self.source_url, target=target_url, direction='out',
protocol='activitypub', source_mf2=json_dumps(self.source_mf2))
# find target's inbox
target_obj = self.target_resp.json()
resp.target_as2 = json_dumps(target_obj)
inbox_url = target_obj.get('inbox')
# find target's inbox
target_obj = self.target_resp.json()
resp.target_as2 = json_dumps(target_obj)
inbox_url = target_obj.get('inbox')
if not inbox_url:
# TODO: test actor/attributedTo and not, with/without inbox
actor = (util.get_first(target_obj, 'actor') or
util.get_first(target_obj, 'attributedTo'))
if isinstance(actor, dict):
inbox_url = actor.get('inbox')
actor = actor.get('url') or actor.get('id')
if not inbox_url and not actor:
self.error('Target object has no actor or attributedTo with URL or id.')
elif not isinstance(actor, str):
self.error('Target actor or attributedTo has unexpected url or id object: %r' % actor)
if not inbox_url:
# TODO: test actor/attributedTo and not, with/without inbox
actor = (util.get_first(target_obj, 'actor') or
util.get_first(target_obj, 'attributedTo'))
if isinstance(actor, dict):
inbox_url = actor.get('inbox')
actor = actor.get('url') or actor.get('id')
if not inbox_url and not actor:
self.error('Target object has no actor or attributedTo with URL or id.')
elif not isinstance(actor, str):
self.error('Target actor or attributedTo has unexpected url or id object: %r' % actor)
if not inbox_url:
# fetch actor as AS object
actor = common.get_as2(actor).json()
inbox_url = actor.get('inbox')
if not inbox_url:
# fetch actor as AS object
actor = common.get_as2(actor).json()
inbox_url = actor.get('inbox')
if not inbox_url:
# TODO: probably need a way to save errors like this so that we can
# return them if ostatus fails too.
# self.error('Target actor has no inbox')
return []
if not inbox_url:
# TODO: probably need a way to save errors like this so that we can
# return them if ostatus fails too.
# self.error('Target actor has no inbox')
continue
inbox_url = urllib.parse.urljoin(target_url, inbox_url)
return [(resp, inbox_url)]
inbox_url = urllib.parse.urljoin(target_url, inbox_url)
resps_and_inbox_urls.append((resp, inbox_url))
return resps_and_inbox_urls
def try_salmon(self):
"""Returns True if we attempted OStatus delivery. Raises otherwise."""
target = self.target_resp.url if self.target_resp else self._single_target()
target = None
if self.target_resp:
target = self.target_resp.url
else:
targets = self._targets()
if targets:
target = targets[0]
if not target:
logging.warning("No targets or followers. Ignoring.")
return False
@ -233,8 +236,9 @@ class WebmentionHandler(common.Handler):
resp: Response
"""
# fetch target HTML page, extract Atom rel-alternate link
target = resp.target()
if not self.target_resp:
self.target_resp = common.requests_get(resp.target())
self.target_resp = common.requests_get(target)
parsed = util.parse_html(self.target_resp)
atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
@ -258,7 +262,9 @@ class WebmentionHandler(common.Handler):
in_reply_to = self.source_obj.get('inReplyTo')
source_obj_obj = self.source_obj.get('object')
if in_reply_to:
in_reply_to[0]['id'] = target_id
for elem in in_reply_to:
if elem.get('url') == target:
elem['id'] = target_id
elif isinstance(source_obj_obj, dict):
source_obj_obj['id'] = target_id