diff --git a/tests/test_web.py b/tests/test_web.py index 0e93d74..4b002eb 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -1783,8 +1783,11 @@ class WebTest(TestCase): I hereby ☕ post """ - mock_get.return_value = requests_response( - feed, headers={'Content-Type': atom.CONTENT_TYPE}) + mock_get.return_value = requests_response(feed, headers={ + 'Content-Type': atom.CONTENT_TYPE, + 'Last-Modified': 'Sat, 01 Jan 2024 01:02:03 GMT', + 'ETag': '"abc123"', + }) got = self.post('/queue/poll-feed', data={'domain': 'user.com'}) self.assertEqual(200, got.status_code) @@ -1792,6 +1795,8 @@ class WebTest(TestCase): user = self.user.key.get() self.assertEqual(NOW, user.last_polled_feed) self.assertEqual('https://user.com/post', user.feed_last_item) + self.assertEqual('"abc123"', user.feed_etag) + self.assertEqual('Sat, 01 Jan 2024 01:02:03 GMT', user.feed_last_modified) mock_get.assert_has_calls(( self.req('https://foo/feed'), @@ -2074,6 +2079,39 @@ class WebTest(TestCase): mock_create_task.assert_not_called() mock_get.assert_not_called() + + @patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task') + def test_poll_feed_etag_last_modified(self, mock_create_task, mock_get, _): + common.RUN_TASKS_INLINE = False + self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL + self.user.obj.put() + + self.user.feed_etag = '"abc123"' + self.user.feed_last_modified ='Sat, 01 Jan 2024 01:02:03 GMT' + self.user.put() + + mock_get.return_value = requests_response('', status=304, headers={ + 'Last-Modified': 'Sat, 99 Jan 2024 01:02:03 GMT', + 'ETag': '"def789"', + }) + + got = self.post('/queue/poll-feed', data={'domain': 'user.com'}) + self.assertEqual(200, got.status_code) + + user = self.user.key.get() + self.assertEqual(NOW, user.last_polled_feed) + self.assertEqual('"def789"', user.feed_etag) + self.assertEqual('Sat, 99 Jan 2024 01:02:03 GMT', user.feed_last_modified) + + mock_get.assert_has_calls([self.req('https://foo/feed', headers={ + 'If-None-Match': '"abc123"', + 'If-Modified-Since': 'Sat, 01 Jan 2024 01:02:03 GMT', + })]) + + expected_eta = NOW_SECONDS + web.MIN_FEED_POLL_PERIOD.total_seconds() + self.assert_task(mock_create_task, 'poll-feed', '/queue/poll-feed', + domain='user.com', eta_seconds=expected_eta) + def _test_verify(self, redirects, hcard, actor, redirects_error=None): self.user.has_redirects = False self.user.put() diff --git a/web.py b/web.py index 2bc287d..6b7cec2 100644 --- a/web.py +++ b/web.py @@ -102,6 +102,8 @@ class Web(User, Protocol): last_webmention_in = ndb.DateTimeProperty(tzinfo=timezone.utc) last_polled_feed = ndb.DateTimeProperty(tzinfo=timezone.utc) feed_last_item = ndb.StringProperty() # id (URL) + feed_etag = ndb.StringProperty() + feed_last_modified = ndb.StringProperty() # Originally, BF served Web users' AP actor ids on fed.brid.gy, eg # https://fed.brid.gy/snarfed.org . When we started adding new protocols, we @@ -642,25 +644,35 @@ def poll_feed_task(): return msg # fetch feed - resp = util.requests_get(url) - content_type = resp.headers.get('Content-Type') or '' - type = FEED_TYPES.get(content_type.split(';')[0]) - if type == 'atom' or (type == 'xml' and rel_type == 'atom'): - try: - activities = atom.atom_to_activities(resp.text) - except ValueError as e: - error(f"Couldn't parse feed as Atom: {e}", status=502) - obj_feed_prop = {'atom': resp.text} - elif type == 'rss' or (type == 'xml' and rel_type == 'rss'): - try: - activities = rss.to_activities(resp.text) - except ValueError as e: - error(f"Couldn't parse feed as RSS: {e}", status=502) - obj_feed_prop = {'rss': resp.text} + headers = {} + if user.feed_etag: + headers['If-None-Match'] = user.feed_etag + if user.feed_last_modified: + headers['If-Modified-Since'] = user.feed_last_modified + resp = util.requests_get(url, headers=headers) + + if resp.status_code == 304: + logger.info('Feed is unchanged since last poll') + activities = [] else: - msg = f'Unknown feed type {content_type}' - logger.info(msg) - return msg + content_type = resp.headers.get('Content-Type') or '' + type = FEED_TYPES.get(content_type.split(';')[0]) + if type == 'atom' or (type == 'xml' and rel_type == 'atom'): + try: + activities = atom.atom_to_activities(resp.text) + except ValueError as e: + error(f"Couldn't parse feed as Atom: {e}", status=502) + obj_feed_prop = {'atom': resp.text} + elif type == 'rss' or (type == 'xml' and rel_type == 'rss'): + try: + activities = rss.to_activities(resp.text) + except ValueError as e: + error(f"Couldn't parse feed as RSS: {e}", status=502) + obj_feed_prop = {'rss': resp.text} + else: + msg = f'Unknown feed type {content_type}' + logger.info(msg) + return msg # create Objects and receive tasks for i, activity in enumerate(activities): @@ -720,6 +732,8 @@ def poll_feed_task(): # update user user.last_polled_feed = util.now() + user.feed_etag = resp.headers.get('ETag') + user.feed_last_modified = resp.headers.get('Last-Modified') user.put() return 'OK'