changedetection.io/backend/store.py

148 wiersze
5.1 KiB
Python
Czysty Zwykły widok Historia

2021-01-27 11:39:46 +00:00
import json
import uuid as uuid_builder
2021-01-27 12:17:35 +00:00
import validators
2021-01-27 11:39:46 +00:00
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :)
2021-01-27 12:17:35 +00:00
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
2021-01-27 11:39:46 +00:00
class ChangeDetectionStore:
def __init__(self):
self.needs_write = False
self.__data = {
2021-01-31 18:55:35 +00:00
'note' : "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
'watching': {},
'settings': {
'headers': {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
},
'requests': {
'timeout': 15, # Default 15 seconds
'minutes_between_check': 3 * 60 # Default 3 hours
}
}
}
2021-01-27 18:45:28 +00:00
# Base definition for all watchers
self.generic_definition = {
'url': None,
'tag': None,
'last_checked': 0,
'last_changed': 0,
2021-01-27 18:57:31 +00:00
'title': None,
'previous_md5': None,
'uuid': str(uuid_builder.uuid4()),
'headers' : {}, # Extra headers to send
'history' : {} # Dict of timestamp and output stripped filename
2021-01-27 18:45:28 +00:00
}
2021-01-27 11:39:46 +00:00
try:
with open('/datastore/url-watches.json') as json_file:
from_disk = json.load(json_file)
# @todo isnt there a way todo this dict.update recursively?
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
if 'watching' in from_disk:
self.__data['watching'].update(from_disk['watching'])
if 'settings' in from_disk:
if 'headers' in from_disk['settings']:
self.__data['settings']['headers'].update(from_disk['settings']['headers'])
if 'requests' in from_disk['settings']:
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
2021-01-27 18:45:28 +00:00
# Reinitialise each `watching` with our generic_definition in the case that we add a new var in the future.
# @todo pretty sure theres a python we todo this with an abstracted(?) object!
2021-01-27 18:45:28 +00:00
i = 0
for uuid, watch in self.data['watching'].items():
2021-01-27 18:45:28 +00:00
_blank = self.generic_definition.copy()
_blank.update(watch)
self.__data['watching'].update({uuid: _blank})
print("Watching:", uuid, _blank['url'])
2021-01-27 11:39:46 +00:00
# First time ran, doesnt exist.
except (FileNotFoundError, json.decoder.JSONDecodeError):
print("Creating JSON store")
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
2021-02-01 09:24:29 +00:00
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid')
self.add_watch(url='https://changedetection.io', tag='Tech news')
2021-01-27 18:45:28 +00:00
2021-02-01 09:24:29 +00:00
# self.entryVariable.get()
def update_watch(self, uuid, val, var):
2021-01-27 18:45:28 +00:00
self.__data['watching'][uuid].update({val: var})
self.needs_write = True
@property
def data(self):
return self.__data
2021-01-27 18:45:28 +00:00
2021-01-29 14:51:30 +00:00
def get_all_tags(self):
tags=[]
for uuid, watch in self.data['watching'].items():
2021-01-30 10:22:59 +00:00
# Support for comma separated list of tags.
for tag in watch['tag'].split(','):
tag = tag.strip()
if not tag in tags:
tags.append(tag)
2021-01-27 11:39:46 +00:00
2021-01-30 11:44:36 +00:00
tags.sort()
2021-01-29 14:51:30 +00:00
return tags
def delete(self, uuid):
2021-01-27 14:12:31 +00:00
# Probably their should be dict...
del(self.__data['watching'][uuid])
self.needs_write = True
2021-01-27 14:12:31 +00:00
2021-01-27 16:35:32 +00:00
def url_exists(self, url):
# Probably their should be dict...
for watch in self.data['watching']:
if watch['url'] == url:
2021-01-27 18:45:28 +00:00
return True
2021-01-27 16:35:32 +00:00
return False
2021-01-27 14:12:31 +00:00
def get_val(self, uuid, val):
# Probably their should be dict...
return self.data['watching'][uuid].get(val)
2021-01-27 11:39:46 +00:00
2021-01-27 12:17:35 +00:00
def add_watch(self, url, tag):
# @todo deal with exception
2021-01-27 12:17:35 +00:00
validators.url(url)
2021-01-27 14:12:31 +00:00
# @todo use a common generic version of this
2021-01-27 18:45:28 +00:00
_blank = self.generic_definition.copy()
_blank.update({
2021-01-27 12:17:35 +00:00
'url': url,
'tag': tag,
'uuid': str(uuid_builder.uuid4())
2021-01-27 12:17:35 +00:00
})
self.data['watching'].update({_blank['uuid']: _blank})
2021-01-27 18:45:28 +00:00
2021-01-27 11:39:46 +00:00
def sync_to_json(self):
print ("Saving index")
2021-01-27 11:39:46 +00:00
with open('/datastore/url-watches.json', 'w') as json_file:
2021-01-27 16:35:32 +00:00
json.dump(self.data, json_file, indent=4)
self.needs_write = False
2021-01-27 11:39:46 +00:00
2021-01-27 18:45:28 +00:00
# body of the constructor