web.poll_feed_task: use URL as id since some feeds use non-URL (eg tag URI) ids

example: https://www.producthunt.com/feed, Atom. excerpt:

```xml
<?xml version="1.0" encoding="UTF-8"?>
<feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
  <id>tag:www.producthunt.com,2005:/feed</id>
  <link rel="alternate" type="text/html" href="https://www.producthunt.com"/>
  <link rel="self" type="application/atom+xml" href="https://www.producthunt.com/feed"/>
  <title>Product Hunt — The best new products, every day</title>
  <updated>2024-01-10T02:57:06-08:00</updated>
  <entry>
    <id>tag:www.producthunt.com,2005:Post/432225</id>
    <published>2023-12-31T18:15:55-08:00</published>
    <updated>2024-01-10T12:54:42-08:00</updated>
    <link rel="alternate" type="text/html" href="https://www.producthunt.com/posts/seemless"/>
    <title>Seemless</title>
    ...
```

fixes https://console.cloud.google.com/errors/detail/CKLuk-v4x8X0NQ;time=P30D?project=bridgy-federated
pull/785/head
Ryan Barrett 2024-01-11 13:00:56 -08:00
rodzic 91876a7d22
commit c8e2fafba4
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
2 zmienionych plików z 61 dodań i 5 usunięć

Wyświetl plik

@ -1779,8 +1779,8 @@ class WebTest(TestCase):
feed = """\
<?xml version="1.0" encoding="UTF-8"?>
<entry xmlns="http://www.w3.org/2005/Atom">
<uri>https://user.com/post</uri>
<content>I hereby post</content>
<link rel="alternate" type="text/html" href="https://user.com/post" />
<content>I hereby post</content>
</entry>
"""
mock_get.return_value = requests_response(
@ -1930,6 +1930,58 @@ class WebTest(TestCase):
))
assert Object.get_by_id('https://user.com/post')
@patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task')
def test_poll_feed_use_url_as_id(self, mock_create_task, mock_get, _):
common.RUN_TASKS_INLINE = False
self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL
self.user.obj.put()
feed = """\
<?xml version="1.0" encoding="UTF-8"?>
<entry xmlns="http://www.w3.org/2005/Atom">
<id>tag:user.com,2999:abc</id>
<link rel="alternate" type="text/html" href="https://user.com/post" />
<content>I hereby post</content>
</entry>
"""
mock_get.return_value = requests_response(
feed, headers={'Content-Type': atom.CONTENT_TYPE})
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
self.assertEqual(200, got.status_code)
self.assertEqual(NOW, self.user.key.get().last_polled_feed)
mock_get.assert_has_calls((
self.req('https://foo/feed'),
))
obj = self.assert_object('https://user.com/post',
users=[self.user.key],
source_protocol='web',
status='new',
atom=feed,
our_as1={
'objectType': 'activity',
'verb': 'post',
'id': 'https://user.com/post',
'url': 'https://user.com/post',
'actor': {'id': 'https://user.com/'},
'object':{
'objectType': 'note',
'id': 'https://user.com/post',
'url': 'https://user.com/post',
'author': {'id': 'https://user.com/'},
'content': 'I hereby ☕ post',
},
'feed_index': 0,
},
type='post',
object_ids=['https://user.com/post'],
labels=['user', 'activity'],
)
self.assert_task(mock_create_task, 'receive', '/queue/receive',
obj=obj.key.urlsafe(),
authed_as='user.com')
def test_poll_feed_fails(self, mock_get, _):
common.RUN_TASKS_INLINE = False
self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL

10
web.py
Wyświetl plik

@ -668,9 +668,13 @@ def poll_feed_task():
for i, activity in enumerate(activities):
# default actor and author to user
activity.setdefault('actor', {}).setdefault('id', user.profile_id())
activity.setdefault('object', {})\
.setdefault('author', {})\
.setdefault('id', user.profile_id())
obj = activity.setdefault('object', {})
obj.setdefault('author', {}).setdefault('id', user.profile_id())
# use URL as id since some feeds use non-URL (eg tag URI) ids
for elem in obj, activity:
if url := elem.get('url'):
elem['id'] = elem['url']
logger.info(f'Converted to AS1: {json_dumps(activity, indent=2)}')