web feed polling: start using ETag/If-None-Match and Last-Modified/If-Modified-Since

for #791
pull/796/head
Ryan Barrett 2024-01-19 21:07:48 -08:00
rodzic 3e49fafd0a
commit 3a8d33ee52
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
2 zmienionych plików z 72 dodań i 20 usunięć

Wyświetl plik

@ -1783,8 +1783,11 @@ class WebTest(TestCase):
<content>I hereby post</content>
</entry>
"""
mock_get.return_value = requests_response(
feed, headers={'Content-Type': atom.CONTENT_TYPE})
mock_get.return_value = requests_response(feed, headers={
'Content-Type': atom.CONTENT_TYPE,
'Last-Modified': 'Sat, 01 Jan 2024 01:02:03 GMT',
'ETag': '"abc123"',
})
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
self.assertEqual(200, got.status_code)
@ -1792,6 +1795,8 @@ class WebTest(TestCase):
user = self.user.key.get()
self.assertEqual(NOW, user.last_polled_feed)
self.assertEqual('https://user.com/post', user.feed_last_item)
self.assertEqual('"abc123"', user.feed_etag)
self.assertEqual('Sat, 01 Jan 2024 01:02:03 GMT', user.feed_last_modified)
mock_get.assert_has_calls((
self.req('https://foo/feed'),
@ -2074,6 +2079,39 @@ class WebTest(TestCase):
mock_create_task.assert_not_called()
mock_get.assert_not_called()
@patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task')
def test_poll_feed_etag_last_modified(self, mock_create_task, mock_get, _):
common.RUN_TASKS_INLINE = False
self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL
self.user.obj.put()
self.user.feed_etag = '"abc123"'
self.user.feed_last_modified ='Sat, 01 Jan 2024 01:02:03 GMT'
self.user.put()
mock_get.return_value = requests_response('', status=304, headers={
'Last-Modified': 'Sat, 99 Jan 2024 01:02:03 GMT',
'ETag': '"def789"',
})
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
self.assertEqual(200, got.status_code)
user = self.user.key.get()
self.assertEqual(NOW, user.last_polled_feed)
self.assertEqual('"def789"', user.feed_etag)
self.assertEqual('Sat, 99 Jan 2024 01:02:03 GMT', user.feed_last_modified)
mock_get.assert_has_calls([self.req('https://foo/feed', headers={
'If-None-Match': '"abc123"',
'If-Modified-Since': 'Sat, 01 Jan 2024 01:02:03 GMT',
})])
expected_eta = NOW_SECONDS + web.MIN_FEED_POLL_PERIOD.total_seconds()
self.assert_task(mock_create_task, 'poll-feed', '/queue/poll-feed',
domain='user.com', eta_seconds=expected_eta)
def _test_verify(self, redirects, hcard, actor, redirects_error=None):
self.user.has_redirects = False
self.user.put()

50
web.py
Wyświetl plik

@ -102,6 +102,8 @@ class Web(User, Protocol):
last_webmention_in = ndb.DateTimeProperty(tzinfo=timezone.utc)
last_polled_feed = ndb.DateTimeProperty(tzinfo=timezone.utc)
feed_last_item = ndb.StringProperty() # id (URL)
feed_etag = ndb.StringProperty()
feed_last_modified = ndb.StringProperty()
# Originally, BF served Web users' AP actor ids on fed.brid.gy, eg
# https://fed.brid.gy/snarfed.org . When we started adding new protocols, we
@ -642,25 +644,35 @@ def poll_feed_task():
return msg
# fetch feed
resp = util.requests_get(url)
content_type = resp.headers.get('Content-Type') or ''
type = FEED_TYPES.get(content_type.split(';')[0])
if type == 'atom' or (type == 'xml' and rel_type == 'atom'):
try:
activities = atom.atom_to_activities(resp.text)
except ValueError as e:
error(f"Couldn't parse feed as Atom: {e}", status=502)
obj_feed_prop = {'atom': resp.text}
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
try:
activities = rss.to_activities(resp.text)
except ValueError as e:
error(f"Couldn't parse feed as RSS: {e}", status=502)
obj_feed_prop = {'rss': resp.text}
headers = {}
if user.feed_etag:
headers['If-None-Match'] = user.feed_etag
if user.feed_last_modified:
headers['If-Modified-Since'] = user.feed_last_modified
resp = util.requests_get(url, headers=headers)
if resp.status_code == 304:
logger.info('Feed is unchanged since last poll')
activities = []
else:
msg = f'Unknown feed type {content_type}'
logger.info(msg)
return msg
content_type = resp.headers.get('Content-Type') or ''
type = FEED_TYPES.get(content_type.split(';')[0])
if type == 'atom' or (type == 'xml' and rel_type == 'atom'):
try:
activities = atom.atom_to_activities(resp.text)
except ValueError as e:
error(f"Couldn't parse feed as Atom: {e}", status=502)
obj_feed_prop = {'atom': resp.text}
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
try:
activities = rss.to_activities(resp.text)
except ValueError as e:
error(f"Couldn't parse feed as RSS: {e}", status=502)
obj_feed_prop = {'rss': resp.text}
else:
msg = f'Unknown feed type {content_type}'
logger.info(msg)
return msg
# create Objects and receive tasks
for i, activity in enumerate(activities):
@ -720,6 +732,8 @@ def poll_feed_task():
# update user
user.last_polled_feed = util.now()
user.feed_etag = resp.headers.get('ETag')
user.feed_last_modified = resp.headers.get('Last-Modified')
user.put()
return 'OK'