diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 5648cb53..167e1ffb 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -810,7 +810,7 @@ def changedetection_app(config=None, datastore_o=None): return redirect(url_for('index')) - @app.route("/diff/", methods=['GET']) + @app.route("/diff/", methods=['GET', 'POST']) @login_required def diff_history_page(uuid): @@ -818,6 +818,7 @@ def changedetection_app(config=None, datastore_o=None): if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() + extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] try: watch = datastore.data['watching'][uuid] @@ -825,6 +826,23 @@ def changedetection_app(config=None, datastore_o=None): flash("No history found for the specified link, bad link?", "error") return redirect(url_for('index')) + # For submission of requesting an extract + if request.method == 'POST': + extract_regex = request.form.get('extract_regex').strip() + output = watch.extract_regex_from_all_history(extract_regex) + if output: + watch_dir = os.path.join(datastore_o.datastore_path, uuid) + response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True)) + response.headers['Content-type'] = 'text/csv' + response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' + response.headers['Pragma'] = 'no-cache' + response.headers['Expires'] = 0 + return response + + + flash('Nothing matches that RegEx', 'error') + redirect(url_for('diff_history_page', uuid=uuid)+'#extract') + history = watch.history dates = list(history.keys()) @@ -866,24 +884,28 @@ def changedetection_app(config=None, datastore_o=None): is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( watch.get('fetch_backend', None) is None and system_uses_webdriver) else False + from changedetectionio import forms + extract_form = forms.extractDataForm(request.form) + output = render_template("diff.html", - watch_a=watch, - newest=newest_version_file_contents, - previous=previous_version_file_contents, - extra_stylesheets=extra_stylesheets, - dark_mode=getDarkModeSetting(), - versions=dates[:-1], # All except current/last - uuid=uuid, - newest_version_timestamp=dates[-1], - current_previous_version=str(previous_version), current_diff_url=watch['url'], + current_previous_version=str(previous_version), + dark_mode=getDarkModeSetting(), + extra_stylesheets=extra_stylesheets, extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']), - left_sticky=True, - screenshot=screenshot_url, + extract_form=extract_form, is_html_webdriver=is_html_webdriver, last_error=watch['last_error'], + last_error_screenshot=watch.get_error_snapshot(), last_error_text=watch.get_error_text(), - last_error_screenshot=watch.get_error_snapshot() + left_sticky=True, + newest=newest_version_file_contents, + newest_version_timestamp=dates[-1], + previous=previous_version_file_contents, + screenshot=screenshot_url, + uuid=uuid, + versions=dates[:-1], # All except current/last + watch_a=watch ) return output diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index e12b6917..e127c5e0 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -448,3 +448,9 @@ class globalSettingsForm(Form): requests = FormField(globalSettingsRequestForm) application = FormField(globalSettingsApplicationForm) save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) + + +class extractDataForm(Form): + extract_regex = StringField('RegEx to extract') + extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"}) + diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 7c0ad045..c3a000b0 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -318,3 +318,47 @@ class model(dict): if os.path.isfile(fname): return fname return False + + def extract_regex_from_all_history(self, regex): + import csv + import re + import datetime + csv_output_filename = False + csv_writer = False + f = None + + # self.history will be keyed with the full path + for k, fname in self.history.items(): + if os.path.isfile(fname): + with open(fname, "r") as f: + contents = f.read() + res = re.findall(regex, contents, re.MULTILINE) + if res: + if not csv_writer: + # A file on the disk can be transferred much faster via flask than a string reply + csv_output_filename = 'report.csv' + f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w') + # @todo some headers in the future + #fieldnames = ['Epoch seconds', 'Date'] + csv_writer = csv.writer(f, + delimiter=',', + quotechar='"', + quoting=csv.QUOTE_MINIMAL, + #fieldnames=fieldnames + ) + csv_writer.writerow(['Epoch seconds', 'Date']) + # csv_writer.writeheader() + + date_str = datetime.datetime.fromtimestamp(int(k)).strftime('%Y-%m-%d %H:%M:%S') + for r in res: + row = [k, date_str] + if isinstance(r, str): + row.append(r) + else: + row+=r + csv_writer.writerow(row) + + if f: + f.close() + + return csv_output_filename diff --git a/changedetectionio/static/js/diff-overview.js b/changedetectionio/static/js/diff-overview.js index fa94316f..31c9bfdd 100644 --- a/changedetectionio/static/js/diff-overview.js +++ b/changedetectionio/static/js/diff-overview.js @@ -13,6 +13,8 @@ $(document).ready(function () { } else if (hash_name === '#error-screenshot') { $("img#error-screenshot-img").attr('src', error_screenshot_url); $("#settings").hide(); + } else if (hash_name === '#extract') { + $("#settings").hide(); } diff --git a/changedetectionio/static/styles/diff.css b/changedetectionio/static/styles/diff.css index 43887289..8663c147 100644 --- a/changedetectionio/static/styles/diff.css +++ b/changedetectionio/static/styles/diff.css @@ -132,8 +132,9 @@ html[data-darkmode="true"] { padding: 2em; margin-left: 1em; margin-right: 1em; - border-radius: 5px; - font-size: 11px; } + border-radius: 5px; } + #diff-ui #text { + font-size: 11px; } #diff-ui table { table-layout: fixed; width: 100%; } diff --git a/changedetectionio/static/styles/scss/diff.scss b/changedetectionio/static/styles/scss/diff.scss index 7c219f20..19783b6f 100644 --- a/changedetectionio/static/styles/scss/diff.scss +++ b/changedetectionio/static/styles/scss/diff.scss @@ -7,7 +7,11 @@ margin-left: 1em; margin-right: 1em; border-radius: 5px; - font-size: 11px; + + // The first tab 'text' diff + #text { + font-size: 11px; + } table { table-layout: fixed; diff --git a/changedetectionio/templates/diff.html b/changedetectionio/templates/diff.html index d9b9195b..4c5d516b 100644 --- a/changedetectionio/templates/diff.html +++ b/changedetectionio/templates/diff.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} - +{% from '_helpers.jinja' import render_field, render_checkbox_field, render_button %} {% block content %}