webmention => AP: handle multiple in-reply-to links

fixes #65. thanks again for reporting, @nekr0z!
thib
Ryan Barrett 2020-06-06 08:39:44 -07:00
rodzic 2a91f01393
commit 2c15820ef6
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
4 zmienionych plików z 110 dodań i 83 usunięć

Wyświetl plik

@ -28,6 +28,7 @@ l/
l3/ l3/
local/ local/
local3/ local3/
local3.7/
pydoc/ pydoc/
pydocs/ pydocs/
python3/ python3/

2
.gitignore vendored
Wyświetl plik

@ -3,6 +3,6 @@
datastore.dat* datastore.dat*
/docs/_build/ /docs/_build/
/l /l
/local /local*
private_notes private_notes
TAGS TAGS

Wyświetl plik

@ -114,6 +114,7 @@ class WebmentionTest(testutil.TestCase):
<div class="h-entry"> <div class="h-entry">
<a class="u-url" href="http://a/reply"></a> <a class="u-url" href="http://a/reply"></a>
<p class="e-content p-name"> <p class="e-content p-name">
<a class="u-in-reply-to" href="http://not/fediverse"></a>
<a class="u-in-reply-to" href="http://orig/post">foo bar</a> <a class="u-in-reply-to" href="http://orig/post">foo bar</a>
<a href="http://localhost/"></a> <a href="http://localhost/"></a>
</p> </p>
@ -163,7 +164,10 @@ class WebmentionTest(testutil.TestCase):
'id': 'http://localhost/r/http://a/reply', 'id': 'http://localhost/r/http://a/reply',
'url': 'http://localhost/r/http://a/reply', 'url': 'http://localhost/r/http://a/reply',
'name': 'foo ☕ bar', 'name': 'foo ☕ bar',
'content': '<a class="u-in-reply-to" href="http://orig/post">foo ☕ bar</a>\n<a href="http://localhost/"></a>', 'content': """\
<a class="u-in-reply-to" href="http://not/fediverse"></a>
<a class="u-in-reply-to" href="http://orig/post">foo bar</a>
<a href="http://localhost/"></a>""",
'inReplyTo': 'tag:orig,2017:as2', 'inReplyTo': 'tag:orig,2017:as2',
'cc': [ 'cc': [
AS2_PUBLIC_AUDIENCE, AS2_PUBLIC_AUDIENCE,
@ -250,7 +254,12 @@ class WebmentionTest(testutil.TestCase):
}, },
} }
self.activitypub_gets = [self.reply, self.orig_as2, self.actor] self.not_fediverse = requests_response("""\
<html>
<body>foo</body>
</html>
""", url='http://not/fediverse', content_type=CONTENT_TYPE_HTML)
self.activitypub_gets = [self.reply, self.not_fediverse, self.orig_as2, self.actor]
def verify_salmon(self, mock_post): def verify_salmon(self, mock_post):
args, kwargs = mock_post.call_args args, kwargs = mock_post.call_args
@ -365,6 +374,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls(( mock_get.assert_has_calls((
self.req('http://a/reply'), self.req('http://a/reply'),
self.req('http://not/fediverse', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML), self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/author', headers=CONNEG_HEADERS_AS2_HTML), self.req('http://orig/author', headers=CONNEG_HEADERS_AS2_HTML),
)) ))
@ -421,7 +431,8 @@ class WebmentionTest(testutil.TestCase):
orig_as2_resp = requests_response( orig_as2_resp = requests_response(
self.orig_as2_data, content_type=CONTENT_TYPE_AS2 + '; charset=utf-8') self.orig_as2_data, content_type=CONTENT_TYPE_AS2 + '; charset=utf-8')
mock_get.side_effect = [self.reply, orig_as2_resp, self.actor] mock_get.side_effect = [self.reply, self.not_fediverse, orig_as2_resp,
self.actor]
mock_post.return_value = requests_response('abc xyz', status=203) mock_post.return_value = requests_response('abc xyz', status=203)
got = application.get_response( got = application.get_response(
@ -433,6 +444,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls(( mock_get.assert_has_calls((
self.req('http://a/reply'), self.req('http://a/reply'),
self.req('http://not/fediverse', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML), self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/author', headers=CONNEG_HEADERS_AS2_HTML), self.req('http://orig/author', headers=CONNEG_HEADERS_AS2_HTML),
)) ))
@ -492,8 +504,8 @@ class WebmentionTest(testutil.TestCase):
self.assertEqual(self.repost_mf2, json_loads(resp.source_mf2)) self.assertEqual(self.repost_mf2, json_loads(resp.source_mf2))
def test_activitypub_link_rel_alternate_as2(self, mock_get, mock_post): def test_activitypub_link_rel_alternate_as2(self, mock_get, mock_post):
mock_get.side_effect = [self.reply, self.orig_html_as2, self.orig_as2, mock_get.side_effect = [self.reply, self.not_fediverse,
self.actor] self.orig_html_as2, self.orig_as2, self.actor]
mock_post.return_value = requests_response('abc xyz') mock_post.return_value = requests_response('abc xyz')
got = application.get_response( got = application.get_response(
@ -505,6 +517,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls(( mock_get.assert_has_calls((
self.req('http://a/reply'), self.req('http://a/reply'),
self.req('http://not/fediverse', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML), self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/as2', headers=CONNEG_HEADERS_AS2), self.req('http://orig/as2', headers=CONNEG_HEADERS_AS2),
self.req('http://orig/author', headers=CONNEG_HEADERS_AS2_HTML), self.req('http://orig/author', headers=CONNEG_HEADERS_AS2_HTML),
@ -668,7 +681,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls(( mock_get.assert_has_calls((
self.req('http://a/follow'), self.req('http://a/follow'),
self.req('http://followee', headers=CONNEG_HEADERS_AS2_HTML), self.req('http://followee/', headers=CONNEG_HEADERS_AS2_HTML),
)) ))
args, kwargs = mock_post.call_args args, kwargs = mock_post.call_args
@ -681,14 +694,15 @@ class WebmentionTest(testutil.TestCase):
rsa_key = kwargs['auth'].header_signer._rsa._key rsa_key = kwargs['auth'].header_signer._rsa._key
self.assertEqual(self.key.private_pem(), rsa_key.exportKey()) self.assertEqual(self.key.private_pem(), rsa_key.exportKey())
resp = Response.get_by_id('http://a/follow http://followee') resp = Response.get_by_id('http://a/follow http://followee/')
self.assertEqual('out', resp.direction) self.assertEqual('out', resp.direction)
self.assertEqual('activitypub', resp.protocol) self.assertEqual('activitypub', resp.protocol)
self.assertEqual('complete', resp.status) self.assertEqual('complete', resp.status)
self.assertEqual(self.follow_mf2, json_loads(resp.source_mf2)) self.assertEqual(self.follow_mf2, json_loads(resp.source_mf2))
def test_salmon_reply(self, mock_get, mock_post): def test_salmon_reply(self, mock_get, mock_post):
mock_get.side_effect = [self.reply, self.orig_html_atom, self.orig_atom] mock_get.side_effect = [self.reply, self.not_fediverse,
self.orig_html_atom, self.orig_atom]
got = application.get_response( got = application.get_response(
'/webmention', method='POST', body=urlencode({ '/webmention', method='POST', body=urlencode({
@ -699,6 +713,7 @@ class WebmentionTest(testutil.TestCase):
mock_get.assert_has_calls(( mock_get.assert_has_calls((
self.req('http://a/reply'), self.req('http://a/reply'),
self.req('http://not/fediverse', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML), self.req('http://orig/post', headers=CONNEG_HEADERS_AS2_HTML),
self.req('http://orig/atom'), self.req('http://orig/atom'),
)) ))
@ -716,10 +731,12 @@ class WebmentionTest(testutil.TestCase):
self.assertEqual({ self.assertEqual({
'type': 'text/html', 'type': 'text/html',
'href': 'http://orig/post', 'href': 'http://orig/post',
'ref': 'tag:fed.brid.gy,2017-08-22:orig-post' 'ref': 'tag:fed.brid.gy,2017-08-22:orig-post',
}, entry['thr_in-reply-to']) }, entry['thr_in-reply-to'])
self.assertEqual( self.assertEqual("""\
'<a class="u-in-reply-to" href="http://orig/post">foo ☕ bar</a><br></br>\n<a href="http://localhost/"></a>', <a class="u-in-reply-to" href="http://not/fediverse"></a><br></br>
<a class="u-in-reply-to" href="http://orig/post">foo bar</a><br></br>
<a href="http://localhost/"></a>""",
entry.content[0]['value']) entry.content[0]['value'])
resp = Response.get_by_id('http://a/reply http://orig/post') resp = Response.get_by_id('http://a/reply http://orig/post')
@ -780,7 +797,8 @@ class WebmentionTest(testutil.TestCase):
'href': 'http://orig/@ryan/salmon', 'href': 'http://orig/@ryan/salmon',
}], }],
}) })
mock_get.side_effect = [self.reply, self.orig_html_atom, orig_atom, webfinger] mock_get.side_effect = [self.reply, self.not_fediverse,
self.orig_html_atom, orig_atom, webfinger]
got = application.get_response('/webmention', method='POST', body=urlencode({ got = application.get_response('/webmention', method='POST', body=urlencode({
'source': 'http://a/reply', 'source': 'http://a/reply',
@ -798,7 +816,7 @@ class WebmentionTest(testutil.TestCase):
<html> <html>
<body>foo</body> <body>foo</body>
</html>""", 'http://orig/url') </html>""", 'http://orig/url')
mock_get.side_effect = [self.reply, orig_no_atom] mock_get.side_effect = [self.reply, self.not_fediverse, orig_no_atom]
got = application.get_response('/webmention', method='POST', body=urlencode({ got = application.get_response('/webmention', method='POST', body=urlencode({
'source': 'http://a/reply', 'source': 'http://a/reply',
@ -816,7 +834,8 @@ class WebmentionTest(testutil.TestCase):
<link href='atom/1' rel='alternate' type='application/atom+xml'> <link href='atom/1' rel='alternate' type='application/atom+xml'>
</meta> </meta>
</html>""", 'http://orig/url') </html>""", 'http://orig/url')
mock_get.side_effect = [self.reply, orig_relative, self.orig_atom] mock_get.side_effect = [self.reply, self.not_fediverse, orig_relative,
self.orig_atom]
got = application.get_response('/webmention', method='POST', body=urlencode({ got = application.get_response('/webmention', method='POST', body=urlencode({
'source': 'http://a/reply', 'source': 'http://a/reply',
@ -836,7 +855,8 @@ class WebmentionTest(testutil.TestCase):
<link href='atom/1' rel='alternate' type='application/atom+xml'> <link href='atom/1' rel='alternate' type='application/atom+xml'>
</meta> </meta>
</html>""", 'http://orig/url') </html>""", 'http://orig/url')
mock_get.side_effect = [self.reply, orig_base, self.orig_atom] mock_get.side_effect = [self.reply, self.not_fediverse, orig_base,
self.orig_atom]
got = application.get_response('/webmention', method='POST', body=urlencode({ got = application.get_response('/webmention', method='POST', body=urlencode({
'source': 'http://a/reply', 'source': 'http://a/reply',

Wyświetl plik

@ -70,16 +70,6 @@ class WebmentionHandler(common.Handler):
self.try_activitypub() or self.try_salmon() self.try_activitypub() or self.try_salmon()
# if self.source_domain not in SKIP_EMAIL_DOMAINS:
# try:
# msg = 'Bridgy Fed: new webmention from %s' % source
# mail.send_mail(
# sender='admin@bridgy-federated.appspotmail.com',
# to='bridgy-fed@ryanb.org',
# subject=msg, body=msg)
# except BaseException:
# logging.warning('Error sending email', exc_info=True)
def try_activitypub(self): def try_activitypub(self):
"""Returns True if we attempted ActivityPub delivery, False otherwise.""" """Returns True if we attempted ActivityPub delivery, False otherwise."""
targets = self._activitypub_targets() targets = self._activitypub_targets()
@ -88,6 +78,7 @@ class WebmentionHandler(common.Handler):
key = MagicKey.get_or_create(self.source_domain) key = MagicKey.get_or_create(self.source_domain)
error = None error = None
last_success = None
# TODO: collect by inbox, add 'to' fields, de-dupe inboxes and recipients # TODO: collect by inbox, add 'to' fields, de-dupe inboxes and recipients
@ -102,6 +93,7 @@ class WebmentionHandler(common.Handler):
try: try:
last = activitypub.send(source_activity, inbox, self.source_domain) last = activitypub.send(source_activity, inbox, self.source_domain)
resp.status = 'complete' resp.status = 'complete'
last_success = last
except BaseException as e: except BaseException as e:
error = e error = e
resp.status = 'error' resp.status = 'error'
@ -109,37 +101,38 @@ class WebmentionHandler(common.Handler):
resp.put() resp.put()
# Pass the AP response status code and body through as our response # Pass the AP response status code and body through as our response
if not error: if last_success:
self.response.status_int = last.status_code self.response.status_int = last_success.status_code
self.response.write(last.text) self.response.write(last_success.text)
elif isinstance(error, requests.HTTPError): elif isinstance(error, requests.HTTPError):
self.response.status_int = error.status_code self.response.status_int = error.status_code
self.response.write(error.text) self.response.write(error.text)
else: else:
self.response.write(str(error)) self.response.write(str(error))
return not error return bool(last_success)
def _single_target(self): def _targets(self):
""" """
Returns: string URL, the source's inReplyTo or object (if appropriate) Returns: list of string URLs, the source's inReplyTos or objects
(if appropriate)
""" """
target = util.get_first(self.source_obj, 'inReplyTo') targets = util.get_urls(self.source_obj, 'inReplyTo')
if target: if targets:
return util.get_url(target) return targets
if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT: if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT:
return util.get_url(util.get_first(self.source_obj, 'object')) return util.get_urls(self.source_obj, 'object')
def _activitypub_targets(self): def _activitypub_targets(self):
""" """
Returns: list of (Response, string inbox URL) Returns: list of (Response, string inbox URL)
""" """
# if there's an in-reply-to, like-of, or repost-of, that's the target. # if there's in-reply-to, like-of, or repost-of, they're the targets.
# otherwise, it's all followers' inboxes. # otherwise, it's all followers' inboxes.
target = self._single_target() targets = self._targets()
if not target: if not targets:
# interpret this as a Create or Update, deliver it to followers # interpret this as a Create or Update, deliver it to followers
inboxes = [] inboxes = []
for follower in Follower.query().filter( for follower in Follower.query().filter(
@ -157,57 +150,67 @@ class WebmentionHandler(common.Handler):
inbox) inbox)
for inbox in inboxes if inbox] for inbox in inboxes if inbox]
# fetch target page as AS2 object resps_and_inbox_urls = []
try: for target in targets:
self.target_resp = common.get_as2(target) # fetch target page as AS2 object
except (requests.HTTPError, exc.HTTPBadGateway) as e: try:
self.target_resp = getattr(e, 'response', None) self.target_resp = common.get_as2(target)
if self.target_resp and self.target_resp.status_code // 100 == 2: except (requests.HTTPError, exc.HTTPBadGateway) as e:
content_type = common.content_type(self.target_resp) or '' self.target_resp = getattr(e, 'response', None)
if content_type.startswith('text/html'): if self.target_resp and self.target_resp.status_code // 100 == 2:
# TODO: pass e.response to try_salmon()'s target_resp content_type = common.content_type(self.target_resp) or ''
return False # make post() try Salmon if content_type.startswith('text/html'):
raise # TODO: pass e.response to try_salmon()'s target_resp
target_url = self.target_resp.url or target continue # give up
raise
target_url = self.target_resp.url or target
resp = Response.get_or_create( resp = Response.get_or_create(
source=self.source_url, target=target_url, direction='out', source=self.source_url, target=target_url, direction='out',
protocol='activitypub', source_mf2=json_dumps(self.source_mf2)) protocol='activitypub', source_mf2=json_dumps(self.source_mf2))
# find target's inbox # find target's inbox
target_obj = self.target_resp.json() target_obj = self.target_resp.json()
resp.target_as2 = json_dumps(target_obj) resp.target_as2 = json_dumps(target_obj)
inbox_url = target_obj.get('inbox') inbox_url = target_obj.get('inbox')
if not inbox_url: if not inbox_url:
# TODO: test actor/attributedTo and not, with/without inbox # TODO: test actor/attributedTo and not, with/without inbox
actor = (util.get_first(target_obj, 'actor') or actor = (util.get_first(target_obj, 'actor') or
util.get_first(target_obj, 'attributedTo')) util.get_first(target_obj, 'attributedTo'))
if isinstance(actor, dict): if isinstance(actor, dict):
inbox_url = actor.get('inbox') inbox_url = actor.get('inbox')
actor = actor.get('url') or actor.get('id') actor = actor.get('url') or actor.get('id')
if not inbox_url and not actor: if not inbox_url and not actor:
self.error('Target object has no actor or attributedTo with URL or id.') self.error('Target object has no actor or attributedTo with URL or id.')
elif not isinstance(actor, str): elif not isinstance(actor, str):
self.error('Target actor or attributedTo has unexpected url or id object: %r' % actor) self.error('Target actor or attributedTo has unexpected url or id object: %r' % actor)
if not inbox_url: if not inbox_url:
# fetch actor as AS object # fetch actor as AS object
actor = common.get_as2(actor).json() actor = common.get_as2(actor).json()
inbox_url = actor.get('inbox') inbox_url = actor.get('inbox')
if not inbox_url: if not inbox_url:
# TODO: probably need a way to save errors like this so that we can # TODO: probably need a way to save errors like this so that we can
# return them if ostatus fails too. # return them if ostatus fails too.
# self.error('Target actor has no inbox') # self.error('Target actor has no inbox')
return [] continue
inbox_url = urllib.parse.urljoin(target_url, inbox_url) inbox_url = urllib.parse.urljoin(target_url, inbox_url)
return [(resp, inbox_url)] resps_and_inbox_urls.append((resp, inbox_url))
return resps_and_inbox_urls
def try_salmon(self): def try_salmon(self):
"""Returns True if we attempted OStatus delivery. Raises otherwise.""" """Returns True if we attempted OStatus delivery. Raises otherwise."""
target = self.target_resp.url if self.target_resp else self._single_target() target = None
if self.target_resp:
target = self.target_resp.url
else:
targets = self._targets()
if targets:
target = targets[0]
if not target: if not target:
logging.warning("No targets or followers. Ignoring.") logging.warning("No targets or followers. Ignoring.")
return False return False
@ -233,8 +236,9 @@ class WebmentionHandler(common.Handler):
resp: Response resp: Response
""" """
# fetch target HTML page, extract Atom rel-alternate link # fetch target HTML page, extract Atom rel-alternate link
target = resp.target()
if not self.target_resp: if not self.target_resp:
self.target_resp = common.requests_get(resp.target()) self.target_resp = common.requests_get(target)
parsed = util.parse_html(self.target_resp) parsed = util.parse_html(self.target_resp)
atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
@ -258,7 +262,9 @@ class WebmentionHandler(common.Handler):
in_reply_to = self.source_obj.get('inReplyTo') in_reply_to = self.source_obj.get('inReplyTo')
source_obj_obj = self.source_obj.get('object') source_obj_obj = self.source_obj.get('object')
if in_reply_to: if in_reply_to:
in_reply_to[0]['id'] = target_id for elem in in_reply_to:
if elem.get('url') == target:
elem['id'] = target_id
elif isinstance(source_obj_obj, dict): elif isinstance(source_obj_obj, dict):
source_obj_obj['id'] = target_id source_obj_obj['id'] = target_id