kopia lustrzana https://github.com/snarfed/bridgy-fed
web feed polling: start using ETag/If-None-Match and Last-Modified/If-Modified-Since
for #791pull/796/head
rodzic
3e49fafd0a
commit
3a8d33ee52
|
@ -1783,8 +1783,11 @@ class WebTest(TestCase):
|
|||
<content>I hereby ☕ post</content>
|
||||
</entry>
|
||||
"""
|
||||
mock_get.return_value = requests_response(
|
||||
feed, headers={'Content-Type': atom.CONTENT_TYPE})
|
||||
mock_get.return_value = requests_response(feed, headers={
|
||||
'Content-Type': atom.CONTENT_TYPE,
|
||||
'Last-Modified': 'Sat, 01 Jan 2024 01:02:03 GMT',
|
||||
'ETag': '"abc123"',
|
||||
})
|
||||
|
||||
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
||||
self.assertEqual(200, got.status_code)
|
||||
|
@ -1792,6 +1795,8 @@ class WebTest(TestCase):
|
|||
user = self.user.key.get()
|
||||
self.assertEqual(NOW, user.last_polled_feed)
|
||||
self.assertEqual('https://user.com/post', user.feed_last_item)
|
||||
self.assertEqual('"abc123"', user.feed_etag)
|
||||
self.assertEqual('Sat, 01 Jan 2024 01:02:03 GMT', user.feed_last_modified)
|
||||
|
||||
mock_get.assert_has_calls((
|
||||
self.req('https://foo/feed'),
|
||||
|
@ -2074,6 +2079,39 @@ class WebTest(TestCase):
|
|||
mock_create_task.assert_not_called()
|
||||
mock_get.assert_not_called()
|
||||
|
||||
|
||||
@patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task')
|
||||
def test_poll_feed_etag_last_modified(self, mock_create_task, mock_get, _):
|
||||
common.RUN_TASKS_INLINE = False
|
||||
self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL
|
||||
self.user.obj.put()
|
||||
|
||||
self.user.feed_etag = '"abc123"'
|
||||
self.user.feed_last_modified ='Sat, 01 Jan 2024 01:02:03 GMT'
|
||||
self.user.put()
|
||||
|
||||
mock_get.return_value = requests_response('', status=304, headers={
|
||||
'Last-Modified': 'Sat, 99 Jan 2024 01:02:03 GMT',
|
||||
'ETag': '"def789"',
|
||||
})
|
||||
|
||||
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
||||
self.assertEqual(200, got.status_code)
|
||||
|
||||
user = self.user.key.get()
|
||||
self.assertEqual(NOW, user.last_polled_feed)
|
||||
self.assertEqual('"def789"', user.feed_etag)
|
||||
self.assertEqual('Sat, 99 Jan 2024 01:02:03 GMT', user.feed_last_modified)
|
||||
|
||||
mock_get.assert_has_calls([self.req('https://foo/feed', headers={
|
||||
'If-None-Match': '"abc123"',
|
||||
'If-Modified-Since': 'Sat, 01 Jan 2024 01:02:03 GMT',
|
||||
})])
|
||||
|
||||
expected_eta = NOW_SECONDS + web.MIN_FEED_POLL_PERIOD.total_seconds()
|
||||
self.assert_task(mock_create_task, 'poll-feed', '/queue/poll-feed',
|
||||
domain='user.com', eta_seconds=expected_eta)
|
||||
|
||||
def _test_verify(self, redirects, hcard, actor, redirects_error=None):
|
||||
self.user.has_redirects = False
|
||||
self.user.put()
|
||||
|
|
50
web.py
50
web.py
|
@ -102,6 +102,8 @@ class Web(User, Protocol):
|
|||
last_webmention_in = ndb.DateTimeProperty(tzinfo=timezone.utc)
|
||||
last_polled_feed = ndb.DateTimeProperty(tzinfo=timezone.utc)
|
||||
feed_last_item = ndb.StringProperty() # id (URL)
|
||||
feed_etag = ndb.StringProperty()
|
||||
feed_last_modified = ndb.StringProperty()
|
||||
|
||||
# Originally, BF served Web users' AP actor ids on fed.brid.gy, eg
|
||||
# https://fed.brid.gy/snarfed.org . When we started adding new protocols, we
|
||||
|
@ -642,25 +644,35 @@ def poll_feed_task():
|
|||
return msg
|
||||
|
||||
# fetch feed
|
||||
resp = util.requests_get(url)
|
||||
content_type = resp.headers.get('Content-Type') or ''
|
||||
type = FEED_TYPES.get(content_type.split(';')[0])
|
||||
if type == 'atom' or (type == 'xml' and rel_type == 'atom'):
|
||||
try:
|
||||
activities = atom.atom_to_activities(resp.text)
|
||||
except ValueError as e:
|
||||
error(f"Couldn't parse feed as Atom: {e}", status=502)
|
||||
obj_feed_prop = {'atom': resp.text}
|
||||
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
|
||||
try:
|
||||
activities = rss.to_activities(resp.text)
|
||||
except ValueError as e:
|
||||
error(f"Couldn't parse feed as RSS: {e}", status=502)
|
||||
obj_feed_prop = {'rss': resp.text}
|
||||
headers = {}
|
||||
if user.feed_etag:
|
||||
headers['If-None-Match'] = user.feed_etag
|
||||
if user.feed_last_modified:
|
||||
headers['If-Modified-Since'] = user.feed_last_modified
|
||||
resp = util.requests_get(url, headers=headers)
|
||||
|
||||
if resp.status_code == 304:
|
||||
logger.info('Feed is unchanged since last poll')
|
||||
activities = []
|
||||
else:
|
||||
msg = f'Unknown feed type {content_type}'
|
||||
logger.info(msg)
|
||||
return msg
|
||||
content_type = resp.headers.get('Content-Type') or ''
|
||||
type = FEED_TYPES.get(content_type.split(';')[0])
|
||||
if type == 'atom' or (type == 'xml' and rel_type == 'atom'):
|
||||
try:
|
||||
activities = atom.atom_to_activities(resp.text)
|
||||
except ValueError as e:
|
||||
error(f"Couldn't parse feed as Atom: {e}", status=502)
|
||||
obj_feed_prop = {'atom': resp.text}
|
||||
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
|
||||
try:
|
||||
activities = rss.to_activities(resp.text)
|
||||
except ValueError as e:
|
||||
error(f"Couldn't parse feed as RSS: {e}", status=502)
|
||||
obj_feed_prop = {'rss': resp.text}
|
||||
else:
|
||||
msg = f'Unknown feed type {content_type}'
|
||||
logger.info(msg)
|
||||
return msg
|
||||
|
||||
# create Objects and receive tasks
|
||||
for i, activity in enumerate(activities):
|
||||
|
@ -720,6 +732,8 @@ def poll_feed_task():
|
|||
|
||||
# update user
|
||||
user.last_polled_feed = util.now()
|
||||
user.feed_etag = resp.headers.get('ETag')
|
||||
user.feed_last_modified = resp.headers.get('Last-Modified')
|
||||
user.put()
|
||||
|
||||
return 'OK'
|
||||
|
|
Ładowanie…
Reference in New Issue