kopia lustrzana https://github.com/snarfed/bridgy-fed
web feed polling: start using ETag/If-None-Match and Last-Modified/If-Modified-Since
for #791pull/796/head
rodzic
3e49fafd0a
commit
3a8d33ee52
|
@ -1783,8 +1783,11 @@ class WebTest(TestCase):
|
||||||
<content>I hereby ☕ post</content>
|
<content>I hereby ☕ post</content>
|
||||||
</entry>
|
</entry>
|
||||||
"""
|
"""
|
||||||
mock_get.return_value = requests_response(
|
mock_get.return_value = requests_response(feed, headers={
|
||||||
feed, headers={'Content-Type': atom.CONTENT_TYPE})
|
'Content-Type': atom.CONTENT_TYPE,
|
||||||
|
'Last-Modified': 'Sat, 01 Jan 2024 01:02:03 GMT',
|
||||||
|
'ETag': '"abc123"',
|
||||||
|
})
|
||||||
|
|
||||||
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
||||||
self.assertEqual(200, got.status_code)
|
self.assertEqual(200, got.status_code)
|
||||||
|
@ -1792,6 +1795,8 @@ class WebTest(TestCase):
|
||||||
user = self.user.key.get()
|
user = self.user.key.get()
|
||||||
self.assertEqual(NOW, user.last_polled_feed)
|
self.assertEqual(NOW, user.last_polled_feed)
|
||||||
self.assertEqual('https://user.com/post', user.feed_last_item)
|
self.assertEqual('https://user.com/post', user.feed_last_item)
|
||||||
|
self.assertEqual('"abc123"', user.feed_etag)
|
||||||
|
self.assertEqual('Sat, 01 Jan 2024 01:02:03 GMT', user.feed_last_modified)
|
||||||
|
|
||||||
mock_get.assert_has_calls((
|
mock_get.assert_has_calls((
|
||||||
self.req('https://foo/feed'),
|
self.req('https://foo/feed'),
|
||||||
|
@ -2074,6 +2079,39 @@ class WebTest(TestCase):
|
||||||
mock_create_task.assert_not_called()
|
mock_create_task.assert_not_called()
|
||||||
mock_get.assert_not_called()
|
mock_get.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task')
|
||||||
|
def test_poll_feed_etag_last_modified(self, mock_create_task, mock_get, _):
|
||||||
|
common.RUN_TASKS_INLINE = False
|
||||||
|
self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL
|
||||||
|
self.user.obj.put()
|
||||||
|
|
||||||
|
self.user.feed_etag = '"abc123"'
|
||||||
|
self.user.feed_last_modified ='Sat, 01 Jan 2024 01:02:03 GMT'
|
||||||
|
self.user.put()
|
||||||
|
|
||||||
|
mock_get.return_value = requests_response('', status=304, headers={
|
||||||
|
'Last-Modified': 'Sat, 99 Jan 2024 01:02:03 GMT',
|
||||||
|
'ETag': '"def789"',
|
||||||
|
})
|
||||||
|
|
||||||
|
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
||||||
|
self.assertEqual(200, got.status_code)
|
||||||
|
|
||||||
|
user = self.user.key.get()
|
||||||
|
self.assertEqual(NOW, user.last_polled_feed)
|
||||||
|
self.assertEqual('"def789"', user.feed_etag)
|
||||||
|
self.assertEqual('Sat, 99 Jan 2024 01:02:03 GMT', user.feed_last_modified)
|
||||||
|
|
||||||
|
mock_get.assert_has_calls([self.req('https://foo/feed', headers={
|
||||||
|
'If-None-Match': '"abc123"',
|
||||||
|
'If-Modified-Since': 'Sat, 01 Jan 2024 01:02:03 GMT',
|
||||||
|
})])
|
||||||
|
|
||||||
|
expected_eta = NOW_SECONDS + web.MIN_FEED_POLL_PERIOD.total_seconds()
|
||||||
|
self.assert_task(mock_create_task, 'poll-feed', '/queue/poll-feed',
|
||||||
|
domain='user.com', eta_seconds=expected_eta)
|
||||||
|
|
||||||
def _test_verify(self, redirects, hcard, actor, redirects_error=None):
|
def _test_verify(self, redirects, hcard, actor, redirects_error=None):
|
||||||
self.user.has_redirects = False
|
self.user.has_redirects = False
|
||||||
self.user.put()
|
self.user.put()
|
||||||
|
|
50
web.py
50
web.py
|
@ -102,6 +102,8 @@ class Web(User, Protocol):
|
||||||
last_webmention_in = ndb.DateTimeProperty(tzinfo=timezone.utc)
|
last_webmention_in = ndb.DateTimeProperty(tzinfo=timezone.utc)
|
||||||
last_polled_feed = ndb.DateTimeProperty(tzinfo=timezone.utc)
|
last_polled_feed = ndb.DateTimeProperty(tzinfo=timezone.utc)
|
||||||
feed_last_item = ndb.StringProperty() # id (URL)
|
feed_last_item = ndb.StringProperty() # id (URL)
|
||||||
|
feed_etag = ndb.StringProperty()
|
||||||
|
feed_last_modified = ndb.StringProperty()
|
||||||
|
|
||||||
# Originally, BF served Web users' AP actor ids on fed.brid.gy, eg
|
# Originally, BF served Web users' AP actor ids on fed.brid.gy, eg
|
||||||
# https://fed.brid.gy/snarfed.org . When we started adding new protocols, we
|
# https://fed.brid.gy/snarfed.org . When we started adding new protocols, we
|
||||||
|
@ -642,25 +644,35 @@ def poll_feed_task():
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
# fetch feed
|
# fetch feed
|
||||||
resp = util.requests_get(url)
|
headers = {}
|
||||||
content_type = resp.headers.get('Content-Type') or ''
|
if user.feed_etag:
|
||||||
type = FEED_TYPES.get(content_type.split(';')[0])
|
headers['If-None-Match'] = user.feed_etag
|
||||||
if type == 'atom' or (type == 'xml' and rel_type == 'atom'):
|
if user.feed_last_modified:
|
||||||
try:
|
headers['If-Modified-Since'] = user.feed_last_modified
|
||||||
activities = atom.atom_to_activities(resp.text)
|
resp = util.requests_get(url, headers=headers)
|
||||||
except ValueError as e:
|
|
||||||
error(f"Couldn't parse feed as Atom: {e}", status=502)
|
if resp.status_code == 304:
|
||||||
obj_feed_prop = {'atom': resp.text}
|
logger.info('Feed is unchanged since last poll')
|
||||||
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
|
activities = []
|
||||||
try:
|
|
||||||
activities = rss.to_activities(resp.text)
|
|
||||||
except ValueError as e:
|
|
||||||
error(f"Couldn't parse feed as RSS: {e}", status=502)
|
|
||||||
obj_feed_prop = {'rss': resp.text}
|
|
||||||
else:
|
else:
|
||||||
msg = f'Unknown feed type {content_type}'
|
content_type = resp.headers.get('Content-Type') or ''
|
||||||
logger.info(msg)
|
type = FEED_TYPES.get(content_type.split(';')[0])
|
||||||
return msg
|
if type == 'atom' or (type == 'xml' and rel_type == 'atom'):
|
||||||
|
try:
|
||||||
|
activities = atom.atom_to_activities(resp.text)
|
||||||
|
except ValueError as e:
|
||||||
|
error(f"Couldn't parse feed as Atom: {e}", status=502)
|
||||||
|
obj_feed_prop = {'atom': resp.text}
|
||||||
|
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
|
||||||
|
try:
|
||||||
|
activities = rss.to_activities(resp.text)
|
||||||
|
except ValueError as e:
|
||||||
|
error(f"Couldn't parse feed as RSS: {e}", status=502)
|
||||||
|
obj_feed_prop = {'rss': resp.text}
|
||||||
|
else:
|
||||||
|
msg = f'Unknown feed type {content_type}'
|
||||||
|
logger.info(msg)
|
||||||
|
return msg
|
||||||
|
|
||||||
# create Objects and receive tasks
|
# create Objects and receive tasks
|
||||||
for i, activity in enumerate(activities):
|
for i, activity in enumerate(activities):
|
||||||
|
@ -720,6 +732,8 @@ def poll_feed_task():
|
||||||
|
|
||||||
# update user
|
# update user
|
||||||
user.last_polled_feed = util.now()
|
user.last_polled_feed = util.now()
|
||||||
|
user.feed_etag = resp.headers.get('ETag')
|
||||||
|
user.feed_last_modified = resp.headers.get('Last-Modified')
|
||||||
user.put()
|
user.put()
|
||||||
|
|
||||||
return 'OK'
|
return 'OK'
|
||||||
|
|
Ładowanie…
Reference in New Issue