Resolve protocol-subdomain-wrapped ids/URLs by stripping the subdomain wrapping

Renames Object.replace_copies_with_originals => resolve_ids. As a side effect, also fixes https://console.cloud.google.com/errors/detail/CK3U3PONxv5Q;time=P30D?project=bridgy-federated
pull/696/head
Ryan Barrett 2023-10-24 10:46:57 -07:00
rodzic d12fd99b03
commit 422a240183
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
10 zmienionych plików z 162 dodań i 98 usunięć

Wyświetl plik

@ -27,9 +27,9 @@ from common import (
DOMAIN_RE,
error,
host_url,
redirect_unwrap,
redirect_wrap,
subdomain_wrap,
unwrap,
)
from models import Follower, Object, PROTOCOLS, User
from protocol import Protocol
@ -543,6 +543,8 @@ def postprocess_as2(activity, orig_obj=None, wrap=True):
# actors. wrap in our domain if necessary, then compact to just string id
# for compatibility, since many other AP implementations choke on objects.
# https://github.com/snarfed/bridgy-fed/issues/658
# TODO: expand this to general purpose compact() function and use elsewhere,
# eg in models.resolve_id
for field in 'actor', 'attributedTo', 'author':
actors = as1.get_objects(activity, field)
if wrap:
@ -850,11 +852,11 @@ def inbox(protocol=None, id=None):
#
# so, set a synthetic URL based on the follower's profile.
# https://github.com/snarfed/bridgy-fed/issues/336
follower_url = redirect_unwrap(util.get_url(activity, 'actor'))
followee_url = redirect_unwrap(util.get_url(activity, 'object'))
follower_url = unwrap(util.get_url(activity, 'actor'))
followee_url = unwrap(util.get_url(activity, 'object'))
activity.setdefault('url', f'{follower_url}#followed-{followee_url}')
obj = Object(id=activity.get('id'), as2=redirect_unwrap(activity))
obj = Object(id=activity.get('id'), as2=unwrap(activity))
return ActivityPub.receive(obj, authed_as=authed_as)

Wyświetl plik

@ -65,6 +65,10 @@ DOMAIN_BLOCKLIST = (
'twitter.com',
)
# populated in models.reset_protocol_properties
SUBDOMAIN_BASE_URL_RE = None
ID_FIELDS = ['id', 'object', 'actor', 'author', 'inReplyTo', 'url']
CACHE_TIME = timedelta(seconds=60)
USER_AGENT = 'Bridgy Fed (https://fed.brid.gy/)'
@ -172,8 +176,8 @@ def subdomain_wrap(proto, path=None):
return urljoin(f'https://{proto.ABBREV or "fed"}{SUPERDOMAIN}/', path)
def redirect_unwrap(val):
"""Removes our redirect wrapping from a URL, if it's there.
def unwrap(val, field=None):
"""Removes our subdomain/redirect wrapping from a URL, if it's there.
``val`` may be a string, dict, or list. dicts and lists are unwrapped
recursively.
@ -182,29 +186,22 @@ def redirect_unwrap(val):
Args:
val (str or dict or list)
field (str): optional field name for this value
Returns:
str: unwrapped url
"""
if isinstance(val, dict):
return {k: redirect_unwrap(v) for k, v in val.items()}
return {f: unwrap(v, field=f) for f, v in val.items()}
elif isinstance(val, list):
return [redirect_unwrap(v) for v in val]
return [unwrap(v) for v in val]
elif isinstance(val, str):
for domain in DOMAINS:
for scheme in 'http', 'https':
base = f'{scheme}://{domain}/'
redirect_prefix = f'{base}r/'
if val.startswith(redirect_prefix):
unwrapped = val.removeprefix(redirect_prefix)
if util.is_web(unwrapped):
return unwrapped
elif val.startswith(base):
path = val.removeprefix(base)
if re.match(DOMAIN_RE, path):
return f'https://{path}/'
unwrapped = SUBDOMAIN_BASE_URL_RE.sub('', val)
if field in ID_FIELDS and re.fullmatch(DOMAIN_RE, unwrapped):
unwrapped = f'https://{unwrapped}/'
return unwrapped
return val
@ -270,7 +267,6 @@ def create_task(queue, **params):
if appengine_info.LOCAL_SERVER:
logger.info(f'Running task inline: {queue} {params}')
return
return app.test_client().post(
path, data=params, headers={flask_util.CLOUD_TASKS_QUEUE_HEADER: ''})

Wyświetl plik

@ -27,7 +27,7 @@ from oauth_dropins.webutil.models import (
from oauth_dropins.webutil.util import json_dumps, json_loads
import common
from common import add, base64_to_long, long_to_base64, redirect_unwrap
from common import add, base64_to_long, long_to_base64, unwrap
import ids
# maps string label to Protocol subclass. populated by ProtocolUserMeta.
@ -53,7 +53,6 @@ OBJECT_EXPIRE_AGE = timedelta(days=90)
OPT_OUT_TAGS = frozenset(('#nobot', '#nobridge'))
logger = logging.getLogger(__name__)
@ -68,8 +67,8 @@ class Target(ndb.Model):
ATProto user DIDs, etc.
Used in :class:`google.cloud.ndb.model.StructuredProperty`\s inside
:class:`Object` and :class:`User`\;
not stored as top-level entities in the datastore.
:class:`Object` and :class:`User`; not stored as top-level entities in the
datastore.
ndb implements this by hoisting each property here into a corresponding
property on the parent entity, prefixed by the StructuredProperty name
@ -115,6 +114,10 @@ def reset_protocol_properties():
Object.source_protocol = ndb.StringProperty(
'source_protocol', choices=list(PROTOCOLS.keys()))
abbrevs = f'({"|".join(PROTOCOLS.keys())}|fed)'
common.SUBDOMAIN_BASE_URL_RE = re.compile(
rf'^https?://({abbrevs}\.brid\.gy|localhost(:8080)?)/(convert/|r/)?({abbrevs}/)?')
def _validate_atproto_did(prop, val):
if not val.startswith('did:plc:'):
@ -589,10 +592,10 @@ class Object(StringIdModel):
owner = None
if self.our_as1:
obj = redirect_unwrap(self.our_as1)
obj = self.our_as1
elif self.as2:
obj = as2.to_as1(redirect_unwrap(self.as2))
obj = as2.to_as1(unwrap(self.as2))
elif self.bsky:
owner, _, _ = parse_at_uri(self.key.id())
@ -640,7 +643,7 @@ class Object(StringIdModel):
def _object_ids(self): # id(s) of inner objects
if self.as1:
return redirect_unwrap(as1.get_ids(self.as1, 'object'))
return unwrap(as1.get_ids(self.as1, 'object'))
object_ids = ndb.ComputedProperty(_object_ids, repeated=True)
@ -923,10 +926,19 @@ class Object(StringIdModel):
{util.ellipsize(name, chars=40)}
</a>"""
def replace_copies_with_originals(self):
"""Replaces ids copied from other protocols with their original ids.
def resolve_ids(self):
"""Resolves "copy" ids, subdomain ids, etc with their originals.
Specifically, replaces these AS1 fields in place:
Specifically, resolves:
* ids in :class:`User.copies` and :class:`Object.copies`, eg ATProto
records and Nostr events that we bridged, to the ids of their
original objects in their source protocol, eg
``at://did:plc:abc/app.bsky.feed.post/123`` => ``https://mas.to/@user/456``.
* Bridgy Fed subdomain URLs to the ids embedded inside them, eg
``https://atproto.brid.gy/ap/did:plc:xyz`` => ``did:plc:xyz``
...in these AS1 fields, in place:
* ``actor``
* ``author``
@ -936,16 +948,17 @@ class Object(StringIdModel):
* ``object.id``
* ``object.inReplyTo``
* ``tags.[objectType=mention].url``
Looks up values in :attr:`User.copies` and :attr:`Object.copies` and
replaces with their key ids.
"""
if not self.as1 or not self.source_protocol:
return
outer_obj = copy.deepcopy(self.as1)
# extract ids, strip Bridgy Fed subdomain URLs
outer_obj = unwrap(self.as1)
if outer_obj != self.as1:
self.our_as1 = util.trim_nulls(outer_obj)
inner_obj = outer_obj['object'] = as1.get_object(outer_obj)
fields = 'actor', 'author', 'inReplyTo'
fields = ['actor', 'author', 'inReplyTo']
mention_tags = [t for t in (as1.get_objects(outer_obj, 'tags')
+ as1.get_objects(inner_obj, 'tags'))
if t.get('objectType') == 'mention']
@ -963,6 +976,7 @@ class Object(StringIdModel):
+ Object.query(Object.copies.uri.IN(ids)).fetch())
replaced = False
def replace(obj, field):
val = as1.get_object(obj, field).get('id')
if val:
@ -976,7 +990,7 @@ class Object(StringIdModel):
replaced = True
for obj in outer_obj, inner_obj:
for field in 'actor', 'author', 'inReplyTo':
for field in fields:
replace(obj, field)
replace(inner_obj, 'id')
@ -985,7 +999,7 @@ class Object(StringIdModel):
replace(tag, 'url')
if replaced:
self.our_as1 = outer_obj
self.our_as1 = util.trim_nulls(outer_obj)
class Follower(ndb.Model):

Wyświetl plik

@ -364,7 +364,7 @@ def fetch_objects(query, by=None):
content = util.parse_html(content).get_text()
urls = as1.object_urls(inner_obj)
id = common.redirect_unwrap(inner_obj.get('id', ''))
id = common.unwrap(inner_obj.get('id', ''))
url = urls[0] if urls else id
if (type == 'update' and
(obj.users and (g.user.is_web_url(id)

Wyświetl plik

@ -521,7 +521,7 @@ class Protocol:
logger.warning(f"actor {actor} isn't authed user {authed_as}")
# update copy ids to originals
obj.replace_copies_with_originals()
obj.resolve_ids()
# write Object to datastore
orig = obj
@ -1119,7 +1119,7 @@ class Protocol:
if not fetched:
return None
obj.replace_copies_with_originals()
obj.resolve_ids()
if obj.new is False:
obj.changed = obj.activity_changed(orig_as1)

Wyświetl plik

@ -228,7 +228,7 @@ ACCEPT = {
'id': 'https://mas.to/6d1a',
'object': 'http://localhost/user.com',
'actor': 'https://mas.to/users/swentel',
'url': 'https://mas.to/users/swentel#followed-https://user.com/',
'url': 'https://mas.to/users/swentel#followed-user.com',
'to': ['https://www.w3.org/ns/activitystreams#Public'],
},
'to': ['https://www.w3.org/ns/activitystreams#Public'],
@ -615,6 +615,11 @@ class ActivityPubTest(TestCase):
expected_obj = {
**as2.to_as1(NOTE_OBJECT),
'author': {'id': 'https://masto.foo/@author'},
'cc': [
{'id': 'https://mas.to/author/followers'},
{'id': 'https://masto.foo/@other'},
{'id': 'target'},
],
}
self.assert_object(NOTE_OBJECT['id'],
source_protocol='activitypub',
@ -622,7 +627,7 @@ class ActivityPubTest(TestCase):
type='note',
feed=[self.user.key, baz.key])
expected_create = as2.to_as1(common.redirect_unwrap(NOTE))
expected_create = as2.to_as1(common.unwrap(NOTE))
expected_create.update({
'actor': as2.to_as1(ACTOR),
'object': expected_obj,
@ -819,7 +824,7 @@ class ActivityPubTest(TestCase):
follow = {
**FOLLOW_WITH_ACTOR,
'url': 'https://mas.to/users/swentel#followed-https://user.com/',
'url': 'https://mas.to/users/swentel#followed-user.com',
}
self.assert_object('https://mas.to/6d1a',
users=[self.swentel_key],
@ -839,7 +844,9 @@ class ActivityPubTest(TestCase):
'url': FOLLOW['object'],
},
}
self._test_inbox_follow_accept(follow, ACCEPT, 200, *mocks)
accept = copy.deepcopy(ACCEPT)
accept['object']['url'] = 'https://mas.to/users/swentel#followed-https://user.com/'
self._test_inbox_follow_accept(follow, accept, 200, *mocks)
follow.update({
'actor': ACTOR,
@ -859,7 +866,7 @@ class ActivityPubTest(TestCase):
self._test_inbox_follow_accept(FOLLOW_WRAPPED, ACCEPT, 200, *mocks,
inbox_path='/ap/sharedInbox')
url = 'https://mas.to/users/swentel#followed-https://user.com/'
url = 'https://mas.to/users/swentel#followed-user.com'
self.assert_object('https://mas.to/6d1a',
users=[self.swentel_key],
notify=[self.user.key],
@ -879,7 +886,7 @@ class ActivityPubTest(TestCase):
self._test_inbox_follow_accept(FOLLOW_WRAPPED, ACCEPT, 502,
mock_head, mock_get, mock_post)
url = 'https://mas.to/users/swentel#followed-https://user.com/'
url = 'https://mas.to/users/swentel#followed-user.com'
self.assert_object('https://mas.to/6d1a',
users=[self.swentel_key],
notify=[self.user.key],
@ -971,7 +978,7 @@ class ActivityPubTest(TestCase):
# double check that follow Object doesn't have www
self.assertEqual('active', follower.status)
self.assertEqual('https://mas.to/users/swentel#followed-https://user.com/',
self.assertEqual('https://mas.to/users/swentel#followed-user.com',
follower.follow.get().as2['url'])
def test_inbox_undo_follow(self, mock_head, mock_get, mock_post):
@ -1086,11 +1093,13 @@ class ActivityPubTest(TestCase):
self.assertEqual(204, got.status_code)
mock_post.assert_not_called()
expected = {
**as2.to_as1(bad),
'actor': as2.to_as1(ACTOR),
'object': 'https://Testing – Brid.gy – Post to Mastodon 3/',
}
self.assert_object(id,
our_as1={
**as2.to_as1(bad),
'actor': as2.to_as1(ACTOR),
},
our_as1=expected,
users=[self.swentel_key],
source_protocol='activitypub',
status='ignored',

Wyświetl plik

@ -49,10 +49,10 @@ class CommonTest(TestCase):
self.assertEqual('http://ap.brid.gy/r/http://foo',
common.redirect_wrap('http://ap.brid.gy/r/http://foo'))
def test_redirect_unwrap_empty(self):
self.assertIsNone(common.redirect_unwrap(None))
def test_unwrap_empty(self):
self.assertIsNone(common.unwrap(None))
for obj in '', {}, []:
self.assertEqual(obj, common.redirect_unwrap(obj))
self.assertEqual(obj, common.unwrap(obj))
def test_subdomain_wrap(self):
self.assertEqual('https://fa.brid.gy/',
@ -63,40 +63,29 @@ class CommonTest(TestCase):
common.subdomain_wrap(UIProtocol))
def test_unwrap_protocol_subdomain(self):
self.assert_equals({
'type': 'Like',
'object': 'http://foo',
}, common.redirect_unwrap({
'type': 'Like',
'object': 'https://ap.brid.gy/r/http://foo',
}))
for input, expected in [
('https://fa.brid.gy/', ''),
('https://fa.brid.gy/ap/fake:foo', 'fake:foo'),
('https://atproto.brid.gy/convert/ap/did:plc:123', 'did:plc:123'),
]:
self.assertEqual(expected, common.unwrap(input))
def test_unwrap_not_web(self):
bad = {
'type': 'Like',
'object': 'http://localhost/r/foo bar',
}
self.assert_equals(bad, common.redirect_unwrap(bad))
def test_unwrap_protocol_subdomain_object(self):
self.assert_equals(
{'object': 'http://foo'},
common.unwrap({'object': 'https://ap.brid.gy/r/http://foo',}))
self.assert_equals(
{'object': {'id': 'https://foo.com/'}},
common.unwrap({'object': {'id': 'https://fa.brid.gy/foo.com'}}))
def test_unwrap_local_actor_urls(self):
self.assert_equals(
{'object': 'https://foo.com/'},
common.redirect_unwrap({'object': 'http://localhost/foo.com'}))
common.unwrap({'object': 'http://localhost/foo.com'}))
self.assert_equals(
{'object': {'id': 'https://foo.com/'}},
common.redirect_unwrap({'object': {'id': 'http://localhost/foo.com'}}))
def test_unwrap_protocol_subdomains(self):
self.assert_equals(
{'object': 'http://foo.com/bar'},
common.redirect_unwrap(
{'object': 'https://atproto.brid.gy/r/http://foo.com/bar'}))
self.assert_equals(
{'object': {'id': 'https://foo.com/'}},
common.redirect_unwrap(
{'object': {'id': 'https://fa.brid.gy/foo.com'}}))
common.unwrap({'object': {'id': 'http://localhost/foo.com'}}))
def test_host_url(self):
with app.test_request_context():

Wyświetl plik

@ -400,7 +400,10 @@ class ObjectTest(TestCase):
# check that it's a separate copy of the entity in the cache
# https://github.com/snarfed/bridgy-fed/issues/558#issuecomment-1603203927
loaded.our_as1 = {'a': 'b'}
self.assertEqual({'x': 'y'}, Protocol.load('foo').our_as1)
self.assertEqual({
'id': 'foo',
'x': 'y',
}, Protocol.load('foo').our_as1)
def test_put_cached_makes_copy(self):
obj = Object(id='foo', our_as1={'x': 'y'})
@ -408,7 +411,10 @@ class ObjectTest(TestCase):
obj.our_as1 = {'a': 'b'}
# don't put()
self.assertEqual({'x': 'y'}, Fake.load('foo').our_as1)
self.assertEqual({
'id': 'foo',
'x': 'y',
}, Fake.load('foo').our_as1)
def test_get_by_id_cached_makes_copy(self):
obj = Object(id='foo', our_as1={'x': 'y'})
@ -419,7 +425,10 @@ class ObjectTest(TestCase):
# check that it's a separate copy of the entity in the cache
# https://github.com/snarfed/bridgy-fed/issues/558#issuecomment-1603203927
loaded.our_as1 = {'a': 'b'}
self.assertEqual({'x': 'y'}, Protocol.load('foo').our_as1)
self.assertEqual({
'id': 'foo',
'x': 'y',
}, Protocol.load('foo').our_as1)
def test_actor_link(self):
for expected, as2 in (
@ -705,12 +714,12 @@ class ObjectTest(TestCase):
'object': {},
}, obj.key.get().as2)
def test_replace_copies_with_originals_empty(self):
def test_resolve_ids_empty(self):
obj = Object()
obj.replace_copies_with_originals()
obj.resolve_ids()
self.assertIsNone(obj.as1)
def test_replace_copies_with_originals_follow(self):
def test_resolve_ids_copies_follow(self):
follow = {
'id': 'fake:follow',
'objectType': 'activity',
@ -721,7 +730,7 @@ class ObjectTest(TestCase):
obj = Object(our_as1=follow, source_protocol='fake')
# no matching copy users
obj.replace_copies_with_originals()
obj.resolve_ids()
self.assert_equals(follow, obj.our_as1)
# matching copy users
@ -729,14 +738,14 @@ class ObjectTest(TestCase):
copies=[Target(uri='fake:alice', protocol='fake')])
self.make_user('other:bob', cls=OtherFake,
copies=[Target(uri='fake:bob', protocol='fake')])
obj.replace_copies_with_originals()
obj.resolve_ids()
self.assert_equals({
**follow,
'actor': 'other:alice',
'object': {'id': 'other:bob'},
}, obj.our_as1)
def test_replace_copies_with_originals_reply(self):
def test_resolve_ids_copies_reply(self):
reply = {
'objectType': 'activity',
'verb': 'create',
@ -756,7 +765,7 @@ class ObjectTest(TestCase):
obj = Object(our_as1=reply, source_protocol='fake')
# no matching copy users or objects
obj.replace_copies_with_originals()
obj.resolve_ids()
self.assert_equals(reply, obj.our_as1)
# matching copies
@ -769,7 +778,7 @@ class ObjectTest(TestCase):
self.store_object(id='other:reply',
copies=[Target(uri='fake:reply', protocol='fake')])
obj.replace_copies_with_originals()
obj.resolve_ids()
self.assert_equals({
'objectType': 'activity',
'verb': 'create',
@ -785,6 +794,39 @@ class ObjectTest(TestCase):
},
}, obj.our_as1)
def test_resolve_ids_subdomain_urls(self):
obj = Object(our_as1={
'objectType': 'activity',
'verb': 'create',
'id': 'https://fa.brid.gy/web/foo.com',
'object': {
'id': 'https://web.brid.gy/fa/fake:reply',
'inReplyTo': 'https://ap.brid.gy/fa/fake:post',
'author': 'https://atproto.brid.gy/ap/did:plc:123',
'tags': [{
'objectType': 'mention',
'url': 'https://ap.brid.gy/atproto/http://inst.com/@me',
}],
},
}, source_protocol='fake')
obj.resolve_ids()
self.assert_equals({
'objectType': 'activity',
'verb': 'create',
'id': 'https://foo.com/',
'object': {
'id': 'fake:reply',
'inReplyTo': 'fake:post',
'author': 'did:plc:123',
'tags': [{
'objectType': 'mention',
'url': 'http://inst.com/@me',
}],
},
}, obj.our_as1)
class FollowerTest(TestCase):
def setUp(self):

Wyświetl plik

@ -165,7 +165,10 @@ class ProtocolTest(TestCase):
Fake.fetchable['foo'] = {'x': 'y'}
loaded = Fake.load('foo')
self.assert_equals({'x': 'y'}, loaded.our_as1)
self.assertEqual({
'id': 'foo',
'x': 'y',
}, loaded.our_as1)
self.assertFalse(loaded.changed)
self.assertTrue(loaded.new)
@ -176,7 +179,10 @@ class ProtocolTest(TestCase):
self.store_object(id='foo', our_as1={'x': 'y'})
loaded = Fake.load('foo')
self.assert_equals({'x': 'y'}, loaded.our_as1)
self.assertEqual({
'id': 'foo',
'x': 'y',
}, loaded.our_as1)
self.assertFalse(loaded.changed)
self.assertFalse(loaded.new)
@ -201,7 +207,10 @@ class ProtocolTest(TestCase):
# check that it's a separate copy of the entity in the cache
# https://github.com/snarfed/bridgy-fed/issues/558#issuecomment-1603203927
loaded.our_as1 = {'a': 'b'}
self.assertEqual({'x': 'y'}, Protocol.load('foo').our_as1)
self.assertEqual({
'id': 'foo',
'x': 'y',
}, Protocol.load('foo').our_as1)
def test_load_remote_true_existing_empty(self):
Fake.fetchable['foo'] = {'x': 'y'}
@ -249,7 +258,10 @@ class ProtocolTest(TestCase):
Fake.fetchable['foo'] = {'content': 'new'}
loaded = Fake.load('foo', remote=True)
self.assert_equals({'content': 'new'}, loaded.our_as1)
self.assertEqual({
'id': 'foo',
'content': 'new',
}, loaded.our_as1)
self.assertTrue(loaded.changed)
self.assertFalse(loaded.new)
self.assertEqual(['foo'], Fake.fetched)
@ -282,7 +294,7 @@ class ProtocolTest(TestCase):
with self.assertRaises(AssertionError):
Fake.load('nope', local=False, remote=False)
def test_load_replace_copies_with_originals(self):
def test_load_resolve_ids(self):
follow = {
'objectType': 'activity',
'verb': 'follow',

Wyświetl plik

@ -436,7 +436,7 @@ class TestCase(unittest.TestCase, testutil.Asserts):
self.assertSetEqual(set(object_ids), set(got.object_ids))
if expected_as1 := props.pop('as1', None):
self.assert_equals(common.redirect_unwrap(expected_as1), got.as1)
self.assert_equals(common.unwrap(expected_as1), got.as1)
if got.mf2:
got.mf2.pop('url', None)