kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Better handling of EmptyReply exception, always bump 'last_checked' in the case of an error (#354)
* Better handling of EmptyReply exception, always bump 'last_checked' in the case of an error, adds testpull/359/head
rodzic
f166ab1e30
commit
f87f7077a6
|
@ -9,6 +9,12 @@ import urllib3.exceptions
|
||||||
|
|
||||||
|
|
||||||
class EmptyReply(Exception):
|
class EmptyReply(Exception):
|
||||||
|
def __init__(self, status_code, url):
|
||||||
|
# Set this so we can use it in other parts of the app
|
||||||
|
self.status_code = status_code
|
||||||
|
self.url = url
|
||||||
|
return
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class Fetcher():
|
class Fetcher():
|
||||||
|
@ -110,6 +116,8 @@ class html_webdriver(Fetcher):
|
||||||
|
|
||||||
# @todo - how to check this? is it possible?
|
# @todo - how to check this? is it possible?
|
||||||
self.status_code = 200
|
self.status_code = 200
|
||||||
|
# @todo somehow we should try to get this working for WebDriver
|
||||||
|
# raise EmptyReply(url=url, status_code=r.status_code)
|
||||||
|
|
||||||
# @todo - dom wait loaded?
|
# @todo - dom wait loaded?
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
@ -151,10 +159,10 @@ class html_requests(Fetcher):
|
||||||
# Return bytes here
|
# Return bytes here
|
||||||
html = r.text
|
html = r.text
|
||||||
|
|
||||||
|
|
||||||
# @todo test this
|
# @todo test this
|
||||||
|
# @todo maybe you really want to test zero-byte return pages?
|
||||||
if not r or not html or not len(html):
|
if not r or not html or not len(html):
|
||||||
raise EmptyReply(url)
|
raise EmptyReply(url=url, status_code=r.status_code)
|
||||||
|
|
||||||
self.status_code = r.status_code
|
self.status_code = r.status_code
|
||||||
self.content = html
|
self.content = html
|
||||||
|
|
|
@ -58,9 +58,7 @@ class perform_site_check():
|
||||||
|
|
||||||
watch = self.datastore.data['watching'][uuid]
|
watch = self.datastore.data['watching'][uuid]
|
||||||
|
|
||||||
update_obj = {
|
update_obj = {}
|
||||||
"last_checked": timestamp
|
|
||||||
}
|
|
||||||
|
|
||||||
extra_headers = self.datastore.get_val(uuid, 'headers')
|
extra_headers = self.datastore.get_val(uuid, 'headers')
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,6 @@ def css_filter(css_filter, html_content):
|
||||||
|
|
||||||
return html_block + "\n"
|
return html_block + "\n"
|
||||||
|
|
||||||
|
|
||||||
# Extract/find element
|
# Extract/find element
|
||||||
def extract_element(find='title', html_content=''):
|
def extract_element(find='title', html_content=''):
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from . util import live_server_setup
|
||||||
|
|
||||||
|
from ..html_tools import *
|
||||||
|
|
||||||
|
def test_setup(live_server):
|
||||||
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_handler(client, live_server):
|
||||||
|
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for('test_endpoint_403_error', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
assert b'Status Code 403' in res.data
|
||||||
|
assert bytes("just now".encode('utf-8')) in res.data
|
|
@ -54,6 +54,13 @@ def live_server_setup(live_server):
|
||||||
resp.headers['Content-Type'] = 'application/json'
|
resp.headers['Content-Type'] = 'application/json'
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
@live_server.app.route('/test-403')
|
||||||
|
def test_endpoint_403_error():
|
||||||
|
|
||||||
|
from flask import make_response
|
||||||
|
resp = make_response('', 403)
|
||||||
|
return resp
|
||||||
|
|
||||||
# Just return the headers in the request
|
# Just return the headers in the request
|
||||||
@live_server.app.route('/test-headers')
|
@live_server.app.route('/test-headers')
|
||||||
def test_headers():
|
def test_headers():
|
||||||
|
|
|
@ -39,9 +39,10 @@ class update_worker(threading.Thread):
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
contents = ""
|
contents = ""
|
||||||
update_obj= {}
|
update_obj= {}
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
now = time.time()
|
|
||||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||||
|
|
||||||
# Re #342
|
# Re #342
|
||||||
|
@ -51,14 +52,13 @@ class update_worker(threading.Thread):
|
||||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||||
|
|
||||||
|
|
||||||
# Always record that we atleast tried
|
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3)})
|
|
||||||
|
|
||||||
except PermissionError as e:
|
except PermissionError as e:
|
||||||
self.app.logger.error("File permission error updating", uuid, str(e))
|
self.app.logger.error("File permission error updating", uuid, str(e))
|
||||||
except content_fetcher.EmptyReply as e:
|
except content_fetcher.EmptyReply as e:
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error':str(e)})
|
# Some kind of custom to-str handler in the exception handler that does this?
|
||||||
|
err_text = "EmptyReply: Status Code {}".format(e.status_code)
|
||||||
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||||
|
'last_check_status': e.status_code})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.app.logger.error("Exception reached processing watch UUID:%s - %s", uuid, str(e))
|
self.app.logger.error("Exception reached processing watch UUID:%s - %s", uuid, str(e))
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||||
|
@ -66,13 +66,14 @@ class update_worker(threading.Thread):
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
watch = self.datastore.data['watching'][uuid]
|
watch = self.datastore.data['watching'][uuid]
|
||||||
|
fname = "" # Saved history text filename
|
||||||
|
|
||||||
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
||||||
if changed_detected or not watch['last_checked']:
|
if changed_detected or not watch['last_checked']:
|
||||||
# A change was detected
|
# A change was detected
|
||||||
fname = self.datastore.save_history_text(watch_uuid=uuid, contents=contents)
|
fname = self.datastore.save_history_text(watch_uuid=uuid, contents=contents)
|
||||||
# Should always be keyed by string(timestamp)
|
# Should always be keyed by string(timestamp)
|
||||||
self.datastore.update_watch(uuid, {"history": {str(update_obj["last_checked"]): fname}})
|
self.datastore.update_watch(uuid, {"history": {str(round(time.time())): fname}})
|
||||||
|
|
||||||
# Generally update anything interesting returned
|
# Generally update anything interesting returned
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||||
|
@ -136,6 +137,11 @@ class update_worker(threading.Thread):
|
||||||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||||
print("!!!! Exception in update_worker !!!\n", e)
|
print("!!!! Exception in update_worker !!!\n", e)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Always record that we atleast tried
|
||||||
|
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||||
|
'last_checked': round(time.time())})
|
||||||
|
|
||||||
self.current_uuid = None # Done
|
self.current_uuid = None # Done
|
||||||
self.q.task_done()
|
self.q.task_done()
|
||||||
|
|
||||||
|
|
Ładowanie…
Reference in New Issue