kopia lustrzana https://github.com/snarfed/bridgy-fed
web.poll_feed_task: handle more HTTP and parse errors
fixes https://console.cloud.google.com/errors/detail/CNj81tKbzNSzwAE;time=P30D?project=bridgy-federatedpull/892/head
rodzic
db9aaa927e
commit
ac4b7fb4e5
|
@ -2004,7 +2004,7 @@ class WebTest(TestCase):
|
||||||
mock_get.side_effect = requests.ConnectionError()
|
mock_get.side_effect = requests.ConnectionError()
|
||||||
|
|
||||||
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
||||||
self.assertEqual(504, got.status_code)
|
self.assertEqual(502, got.status_code)
|
||||||
self.assertIsNone(self.user.key.get().last_polled_feed)
|
self.assertIsNone(self.user.key.get().last_polled_feed)
|
||||||
|
|
||||||
def test_poll_feed_unsupported_content_types(self, mock_get, _):
|
def test_poll_feed_unsupported_content_types(self, mock_get, _):
|
||||||
|
@ -2030,13 +2030,10 @@ class WebTest(TestCase):
|
||||||
self.assertEqual(502, got.status_code)
|
self.assertEqual(502, got.status_code)
|
||||||
self.assertIsNone(self.user.key.get().last_polled_feed)
|
self.assertIsNone(self.user.key.get().last_polled_feed)
|
||||||
|
|
||||||
@patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task')
|
def test_poll_feed_parse_error(self, mock_get, _):
|
||||||
def test_poll_feed_user_feed_last_item(self, mock_create_task, mock_get, _):
|
|
||||||
common.RUN_TASKS_INLINE = False
|
common.RUN_TASKS_INLINE = False
|
||||||
self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL
|
self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL
|
||||||
self.user.obj.put()
|
self.user.obj.put()
|
||||||
self.user.feed_last_item = 'https://user.com/post'
|
|
||||||
self.user.put()
|
|
||||||
|
|
||||||
feed = """\
|
feed = """\
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
@ -2047,11 +2044,34 @@ class WebTest(TestCase):
|
||||||
"""
|
"""
|
||||||
mock_get.return_value = requests_response(
|
mock_get.return_value = requests_response(
|
||||||
feed, headers={'Content-Type': atom.CONTENT_TYPE})
|
feed, headers={'Content-Type': atom.CONTENT_TYPE})
|
||||||
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
|
||||||
self.assertEqual(200, got.status_code)
|
|
||||||
|
|
||||||
|
for content_type in None, 'text/plain':
|
||||||
|
mock_get.return_value = requests_response(
|
||||||
|
'nope', headers={'Content-Type': content_type})
|
||||||
|
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
||||||
|
self.assertEqual(200, got.status_code)
|
||||||
|
self.assertIsNone(self.user.key.get().last_polled_feed)
|
||||||
|
|
||||||
|
@patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task')
|
||||||
|
def test_poll_feed_user_feed_last_item(self, mock_create_task, mock_get, _):
|
||||||
|
common.RUN_TASKS_INLINE = False
|
||||||
|
self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL
|
||||||
|
self.user.obj.put()
|
||||||
|
|
||||||
|
# bad unescaped & char in title, raises xml.etree.ElementTree.ParseError
|
||||||
|
feed = """\
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Xerox 820 & CP/M</title>
|
||||||
|
</entry>
|
||||||
|
"""
|
||||||
|
mock_get.return_value = requests_response(
|
||||||
|
feed, headers={'Content-Type': atom.CONTENT_TYPE})
|
||||||
|
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
|
||||||
|
|
||||||
|
self.assertEqual(502, got.status_code)
|
||||||
self.assertEqual(1, Object.query().count()) # only user profile
|
self.assertEqual(1, Object.query().count()) # only user profile
|
||||||
mock_create_task.assert_called_once() # only the next poll-feed task
|
mock_create_task.assert_not_called() # doesn't create a next poll-feed task
|
||||||
|
|
||||||
@patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task')
|
@patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task')
|
||||||
def test_poll_feed_blocklisted_entry_url(self, mock_create_task, mock_get, _):
|
def test_poll_feed_blocklisted_entry_url(self, mock_create_task, mock_get, _):
|
||||||
|
|
6
web.py
6
web.py
|
@ -6,6 +6,7 @@ import re
|
||||||
import statistics
|
import statistics
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from urllib.parse import quote, urlencode, urljoin, urlparse
|
from urllib.parse import quote, urlencode, urljoin, urlparse
|
||||||
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
from flask import g, redirect, render_template, request
|
from flask import g, redirect, render_template, request
|
||||||
from google.cloud import ndb
|
from google.cloud import ndb
|
||||||
|
@ -658,7 +659,7 @@ def poll_feed_task():
|
||||||
headers['If-None-Match'] = user.feed_etag
|
headers['If-None-Match'] = user.feed_etag
|
||||||
if user.feed_last_modified:
|
if user.feed_last_modified:
|
||||||
headers['If-Modified-Since'] = user.feed_last_modified
|
headers['If-Modified-Since'] = user.feed_last_modified
|
||||||
resp = util.requests_get(url, headers=headers)
|
resp = util.requests_get(url, headers=headers, gateway=True)
|
||||||
|
|
||||||
content_type = resp.headers.get('Content-Type') or ''
|
content_type = resp.headers.get('Content-Type') or ''
|
||||||
type = FEED_TYPES.get(content_type.split(';')[0])
|
type = FEED_TYPES.get(content_type.split(';')[0])
|
||||||
|
@ -668,7 +669,8 @@ def poll_feed_task():
|
||||||
elif type == 'atom' or (type == 'xml' and rel_type == 'atom'):
|
elif type == 'atom' or (type == 'xml' and rel_type == 'atom'):
|
||||||
try:
|
try:
|
||||||
activities = atom.atom_to_activities(resp.text)
|
activities = atom.atom_to_activities(resp.text)
|
||||||
except ValueError as e:
|
except (ValueError, ElementTree.ParseError) as e:
|
||||||
|
# TODO: should probably still create the next poll-feed task
|
||||||
error(f"Couldn't parse feed as Atom: {e}", status=502)
|
error(f"Couldn't parse feed as Atom: {e}", status=502)
|
||||||
obj_feed_prop = {'atom': resp.text}
|
obj_feed_prop = {'atom': resp.text}
|
||||||
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
|
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
|
||||||
|
|
Ładowanie…
Reference in New Issue