2024-10-10 22:19:19 +00:00
import importlib
2023-07-10 14:08:45 +00:00
from concurrent . futures import ThreadPoolExecutor
2024-10-10 22:19:19 +00:00
from changedetectionio . processors . text_json_diff . processor import FilterNotFoundInResponse
2024-02-10 23:09:12 +00:00
from changedetectionio . store import ChangeDetectionStore
2023-07-10 14:08:45 +00:00
from functools import wraps
from flask import Blueprint
from flask_login import login_required
STATUS_CHECKING = 0
STATUS_FAILED = 1
STATUS_OK = 2
THREADPOOL_MAX_WORKERS = 3
_DEFAULT_POOL = ThreadPoolExecutor ( max_workers = THREADPOOL_MAX_WORKERS )
# Maybe use fetch-time if its >5 to show some expected load time?
def threadpool ( f , executor = None ) :
@wraps ( f )
def wrap ( * args , * * kwargs ) :
return ( executor or _DEFAULT_POOL ) . submit ( f , * args , * * kwargs )
return wrap
def construct_blueprint ( datastore : ChangeDetectionStore ) :
check_proxies_blueprint = Blueprint ( ' check_proxies ' , __name__ )
checks_in_progress = { }
@threadpool
def long_task ( uuid , preferred_proxy ) :
import time
2024-02-10 23:09:12 +00:00
from changedetectionio . content_fetchers import exceptions as content_fetcher_exceptions
2024-04-25 20:06:09 +00:00
from changedetectionio . safe_jinja import render as jinja_render
2023-07-10 14:08:45 +00:00
status = { ' status ' : ' ' , ' length ' : 0 , ' text ' : ' ' }
contents = ' '
now = time . time ( )
try :
2024-10-10 22:19:19 +00:00
processor_module = importlib . import_module ( " changedetectionio.processors.text_json_diff.processor " )
update_handler = processor_module . perform_site_check ( datastore = datastore ,
watch_uuid = uuid
)
update_handler . call_browser ( preferred_proxy_id = preferred_proxy )
2023-07-10 14:08:45 +00:00
# title, size is len contents not len xfer
2024-02-10 23:09:12 +00:00
except content_fetcher_exceptions . Non200ErrorCodeReceived as e :
2023-07-10 14:08:45 +00:00
if e . status_code == 404 :
status . update ( { ' status ' : ' OK ' , ' length ' : len ( contents ) , ' text ' : f " OK but 404 (page not found) " } )
2023-07-11 14:48:50 +00:00
elif e . status_code == 403 or e . status_code == 401 :
status . update ( { ' status ' : ' ERROR ' , ' length ' : len ( contents ) , ' text ' : f " { e . status_code } - Access denied " } )
2023-07-10 14:08:45 +00:00
else :
status . update ( { ' status ' : ' ERROR ' , ' length ' : len ( contents ) , ' text ' : f " Status code: { e . status_code } " } )
2024-10-10 22:19:19 +00:00
except FilterNotFoundInResponse :
2023-07-11 08:44:21 +00:00
status . update ( { ' status ' : ' OK ' , ' length ' : len ( contents ) , ' text ' : f " OK but CSS/xPath filter not found (page changed layout?) " } )
2024-02-10 23:09:12 +00:00
except content_fetcher_exceptions . EmptyReply as e :
2023-07-11 14:48:50 +00:00
if e . status_code == 403 or e . status_code == 401 :
status . update ( { ' status ' : ' ERROR OTHER ' , ' length ' : len ( contents ) , ' text ' : f " Got empty reply with code { e . status_code } - Access denied " } )
else :
status . update ( { ' status ' : ' ERROR OTHER ' , ' length ' : len ( contents ) if contents else 0 , ' text ' : f " Empty reply with code { e . status_code } , needs chrome? " } )
2024-02-10 23:09:12 +00:00
except content_fetcher_exceptions . ReplyWithContentButNoText as e :
2023-09-26 12:29:42 +00:00
txt = f " Got reply but with no content - Status code { e . status_code } - It ' s possible that the filters were found, but contained no usable text (or contained only an image). "
status . update ( { ' status ' : ' ERROR ' , ' text ' : txt } )
2023-07-10 14:08:45 +00:00
except Exception as e :
2023-09-26 12:29:42 +00:00
status . update ( { ' status ' : ' ERROR OTHER ' , ' length ' : len ( contents ) if contents else 0 , ' text ' : ' Error: ' + type ( e ) . __name__ + str ( e ) } )
2023-07-10 14:08:45 +00:00
else :
status . update ( { ' status ' : ' OK ' , ' length ' : len ( contents ) , ' text ' : ' ' } )
if status . get ( ' text ' ) :
2024-04-25 20:06:09 +00:00
# parse 'text' as text for safety
v = { ' text ' : status [ ' text ' ] }
status [ ' text ' ] = jinja_render ( template_str = ' {{ text|e}} ' , * * v )
2023-07-10 14:08:45 +00:00
status [ ' time ' ] = " {:.2f} s " . format ( time . time ( ) - now )
return status
def _recalc_check_status ( uuid ) :
results = { }
for k , v in checks_in_progress . get ( uuid , { } ) . items ( ) :
try :
r_1 = v . result ( timeout = 0.05 )
except Exception as e :
# If timeout error?
results [ k ] = { ' status ' : ' RUNNING ' }
else :
results [ k ] = r_1
return results
@login_required
@check_proxies_blueprint.route ( " /<string:uuid>/status " , methods = [ ' GET ' ] )
def get_recheck_status ( uuid ) :
results = _recalc_check_status ( uuid = uuid )
return results
@login_required
@check_proxies_blueprint.route ( " /<string:uuid>/start " , methods = [ ' GET ' ] )
def start_check ( uuid ) :
if not datastore . proxy_list :
return
2023-07-11 14:48:50 +00:00
if checks_in_progress . get ( uuid ) :
state = _recalc_check_status ( uuid = uuid )
for proxy_key , v in state . items ( ) :
if v . get ( ' status ' ) == ' RUNNING ' :
return state
else :
checks_in_progress [ uuid ] = { }
2023-07-10 14:08:45 +00:00
for k , v in datastore . proxy_list . items ( ) :
if not checks_in_progress [ uuid ] . get ( k ) :
checks_in_progress [ uuid ] [ k ] = long_task ( uuid = uuid , preferred_proxy = k )
results = _recalc_check_status ( uuid = uuid )
return results
return check_proxies_blueprint