kopia lustrzana https://github.com/dgtlmoon/changedetection.io
* Re #342 - check for accidental python byte encoding of non-utf8/string, check return type of fetcher and fix encoding of notification contentpull/337/head
rodzic
d4dc3466dc
commit
489671dcca
|
|
@ -14,7 +14,7 @@ class EmptyReply(Exception):
|
|||
class Fetcher():
|
||||
error = None
|
||||
status_code = None
|
||||
content = None # Should be bytes?
|
||||
content = None # Should always be bytes.
|
||||
|
||||
fetcher_description ="No description"
|
||||
|
||||
|
|
@ -129,7 +129,6 @@ class html_webdriver(Fetcher):
|
|||
# driver.quit() seems to cause better exceptions
|
||||
driver.quit()
|
||||
|
||||
|
||||
return True
|
||||
|
||||
# "html_requests" is listed as the default fetcher in store.py!
|
||||
|
|
@ -146,6 +145,8 @@ class html_requests(Fetcher):
|
|||
timeout=timeout,
|
||||
verify=False)
|
||||
|
||||
# https://stackoverflow.com/questions/44203397/python-requests-get-returns-improperly-decoded-text-instead-of-utf-8
|
||||
# Return bytes here
|
||||
html = r.text
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -367,6 +367,10 @@ class ChangeDetectionStore:
|
|||
import uuid
|
||||
|
||||
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
|
||||
# Incase the operator deleted it, check and create.
|
||||
if not os.path.isdir(output_path):
|
||||
mkdir(output_path)
|
||||
|
||||
fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(contents)
|
||||
|
|
|
|||
|
|
@ -159,6 +159,9 @@ def test_check_notification(client, live_server):
|
|||
|
||||
with open("test-datastore/notification.txt", "r") as f:
|
||||
notification_submission = f.read()
|
||||
print ("Notification submission was:", notification_submission)
|
||||
# Re #342 - check for accidental python byte encoding of non-utf8/string
|
||||
assert "b'" not in notification_submission
|
||||
|
||||
assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
|
||||
assert "Watch title: my title" in notification_submission
|
||||
|
|
|
|||
|
|
@ -2,7 +2,12 @@ import threading
|
|||
import queue
|
||||
import time
|
||||
|
||||
# Requests for checking on the site use a pool of thread Workers managed by a Queue.
|
||||
# A single update worker
|
||||
#
|
||||
# Requests for checking on a single site(watch) from a queue of watches
|
||||
# (another process inserts watches into the queue that are time-ready for checking)
|
||||
|
||||
|
||||
class update_worker(threading.Thread):
|
||||
current_uuid = None
|
||||
|
||||
|
|
@ -39,6 +44,13 @@ class update_worker(threading.Thread):
|
|||
now = time.time()
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
if not isinstance(contents, (bytes, bytearray)):
|
||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
|
||||
|
||||
# Always record that we atleast tried
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3)})
|
||||
|
||||
|
|
@ -111,7 +123,7 @@ class update_worker(threading.Thread):
|
|||
n_object.update({
|
||||
'watch_url': watch['url'],
|
||||
'uuid': uuid,
|
||||
'current_snapshot': str(contents),
|
||||
'current_snapshot': contents.decode('utf-8'),
|
||||
'diff_full': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
|
||||
'diff': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
|
||||
})
|
||||
|
|
|
|||
Ładowanie…
Reference in New Issue