kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Better handling of EmptyReply exception, always bump 'last_checked' in the case of an error (#354)
* Better handling of EmptyReply exception, always bump 'last_checked' in the case of an error, adds testpull/359/head
rodzic
f166ab1e30
commit
f87f7077a6
|
@ -9,6 +9,12 @@ import urllib3.exceptions
|
|||
|
||||
|
||||
class EmptyReply(Exception):
|
||||
def __init__(self, status_code, url):
|
||||
# Set this so we can use it in other parts of the app
|
||||
self.status_code = status_code
|
||||
self.url = url
|
||||
return
|
||||
|
||||
pass
|
||||
|
||||
class Fetcher():
|
||||
|
@ -110,6 +116,8 @@ class html_webdriver(Fetcher):
|
|||
|
||||
# @todo - how to check this? is it possible?
|
||||
self.status_code = 200
|
||||
# @todo somehow we should try to get this working for WebDriver
|
||||
# raise EmptyReply(url=url, status_code=r.status_code)
|
||||
|
||||
# @todo - dom wait loaded?
|
||||
time.sleep(5)
|
||||
|
@ -151,10 +159,10 @@ class html_requests(Fetcher):
|
|||
# Return bytes here
|
||||
html = r.text
|
||||
|
||||
|
||||
# @todo test this
|
||||
# @todo maybe you really want to test zero-byte return pages?
|
||||
if not r or not html or not len(html):
|
||||
raise EmptyReply(url)
|
||||
raise EmptyReply(url=url, status_code=r.status_code)
|
||||
|
||||
self.status_code = r.status_code
|
||||
self.content = html
|
||||
|
|
|
@ -58,9 +58,7 @@ class perform_site_check():
|
|||
|
||||
watch = self.datastore.data['watching'][uuid]
|
||||
|
||||
update_obj = {
|
||||
"last_checked": timestamp
|
||||
}
|
||||
update_obj = {}
|
||||
|
||||
extra_headers = self.datastore.get_val(uuid, 'headers')
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@ def css_filter(css_filter, html_content):
|
|||
|
||||
return html_block + "\n"
|
||||
|
||||
|
||||
# Extract/find element
|
||||
def extract_element(find='title', html_content=''):
|
||||
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from . util import live_server_setup
|
||||
|
||||
from ..html_tools import *
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
|
||||
def test_error_handler(client, live_server):
|
||||
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint_403_error', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'Status Code 403' in res.data
|
||||
assert bytes("just now".encode('utf-8')) in res.data
|
|
@ -54,6 +54,13 @@ def live_server_setup(live_server):
|
|||
resp.headers['Content-Type'] = 'application/json'
|
||||
return resp
|
||||
|
||||
@live_server.app.route('/test-403')
|
||||
def test_endpoint_403_error():
|
||||
|
||||
from flask import make_response
|
||||
resp = make_response('', 403)
|
||||
return resp
|
||||
|
||||
# Just return the headers in the request
|
||||
@live_server.app.route('/test-headers')
|
||||
def test_headers():
|
||||
|
|
|
@ -39,9 +39,10 @@ class update_worker(threading.Thread):
|
|||
changed_detected = False
|
||||
contents = ""
|
||||
update_obj= {}
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
now = time.time()
|
||||
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||
|
||||
# Re #342
|
||||
|
@ -51,14 +52,13 @@ class update_worker(threading.Thread):
|
|||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
|
||||
|
||||
# Always record that we atleast tried
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3)})
|
||||
|
||||
except PermissionError as e:
|
||||
self.app.logger.error("File permission error updating", uuid, str(e))
|
||||
except content_fetcher.EmptyReply as e:
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error':str(e)})
|
||||
|
||||
# Some kind of custom to-str handler in the exception handler that does this?
|
||||
err_text = "EmptyReply: Status Code {}".format(e.status_code)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except Exception as e:
|
||||
self.app.logger.error("Exception reached processing watch UUID:%s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
@ -66,13 +66,14 @@ class update_worker(threading.Thread):
|
|||
else:
|
||||
try:
|
||||
watch = self.datastore.data['watching'][uuid]
|
||||
fname = "" # Saved history text filename
|
||||
|
||||
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
||||
if changed_detected or not watch['last_checked']:
|
||||
# A change was detected
|
||||
fname = self.datastore.save_history_text(watch_uuid=uuid, contents=contents)
|
||||
# Should always be keyed by string(timestamp)
|
||||
self.datastore.update_watch(uuid, {"history": {str(update_obj["last_checked"]): fname}})
|
||||
self.datastore.update_watch(uuid, {"history": {str(round(time.time())): fname}})
|
||||
|
||||
# Generally update anything interesting returned
|
||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
@ -136,6 +137,11 @@ class update_worker(threading.Thread):
|
|||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||
print("!!!! Exception in update_worker !!!\n", e)
|
||||
|
||||
finally:
|
||||
# Always record that we atleast tried
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||
'last_checked': round(time.time())})
|
||||
|
||||
self.current_uuid = None # Done
|
||||
self.q.task_done()
|
||||
|
||||
|
|
Ładowanie…
Reference in New Issue