kopia lustrzana https://github.com/dgtlmoon/changedetection.io
BrowserSteps - Refactored to re-use playwright context which should solve some errors
rodzic
5f338d7824
commit
e4f6d54ae2
|
@ -27,58 +27,103 @@ import os
|
|||
import logging
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio import login_optionally_required
|
||||
browsersteps_live_ui_o = {}
|
||||
browsersteps_playwright_browser_interface = None
|
||||
browsersteps_playwright_browser_interface_browser = None
|
||||
browsersteps_playwright_browser_interface_context = None
|
||||
browsersteps_playwright_browser_interface_end_time = None
|
||||
browsersteps_playwright_browser_interface_start_time = None
|
||||
|
||||
def cleanup_playwright_session():
|
||||
browsersteps_sessions = {}
|
||||
io_interface_context = None
|
||||
|
||||
global browsersteps_live_ui_o
|
||||
global browsersteps_playwright_browser_interface
|
||||
global browsersteps_playwright_browser_interface_browser
|
||||
global browsersteps_playwright_browser_interface_context
|
||||
global browsersteps_playwright_browser_interface_end_time
|
||||
global browsersteps_playwright_browser_interface_start_time
|
||||
|
||||
browsersteps_live_ui_o = {}
|
||||
browsersteps_playwright_browser_interface = None
|
||||
browsersteps_playwright_browser_interface_browser = None
|
||||
browsersteps_playwright_browser_interface_end_time = None
|
||||
browsersteps_playwright_browser_interface_start_time = None
|
||||
|
||||
print("Cleaning up old playwright session because time was up, calling .goodbye()")
|
||||
try:
|
||||
browsersteps_playwright_browser_interface_context.goodbye()
|
||||
except Exception as e:
|
||||
print ("Got exception in shutdown, probably OK")
|
||||
print (str(e))
|
||||
|
||||
browsersteps_playwright_browser_interface_context = None
|
||||
|
||||
print ("Cleaning up old playwright session because time was up - done")
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||
|
||||
def start_browsersteps_session(watch_uuid):
|
||||
from . import nonContext
|
||||
from . import browser_steps
|
||||
import time
|
||||
global browsersteps_sessions
|
||||
global io_interface_context
|
||||
|
||||
|
||||
# We keep the playwright session open for many minutes
|
||||
seconds_keepalive = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||
|
||||
browsersteps_start_session = {'start_time': time.time()}
|
||||
|
||||
# You can only have one of these running
|
||||
# This should be very fine to leave running for the life of the application
|
||||
# @idea - Make it global so the pool of watch fetchers can use it also
|
||||
if not io_interface_context:
|
||||
io_interface_context = nonContext.c_sync_playwright()
|
||||
# Start the Playwright context, which is actually a nodejs sub-process and communicates over STDIN/STDOUT pipes
|
||||
io_interface_context = io_interface_context.start()
|
||||
|
||||
|
||||
# keep it alive for 10 seconds more than we advertise, sometimes it helps to keep it shutting down cleanly
|
||||
keepalive = "&timeout={}".format(((seconds_keepalive + 3) * 1000))
|
||||
try:
|
||||
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(
|
||||
os.getenv('PLAYWRIGHT_DRIVER_URL', '') + keepalive)
|
||||
except Exception as e:
|
||||
if 'ECONNREFUSED' in str(e):
|
||||
return make_response('Unable to start the Playwright Browser session, is it running?', 401)
|
||||
else:
|
||||
return make_response(str(e), 401)
|
||||
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||
if proxy_url:
|
||||
|
||||
# Playwright needs separate username and password values
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
|
||||
print("Browser Steps: UUID {} selected proxy {}".format(watch_uuid, proxy_url))
|
||||
|
||||
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browsersteps_start_session['browser'],
|
||||
proxy=proxy)
|
||||
|
||||
# For test
|
||||
#browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||
|
||||
return browsersteps_start_session
|
||||
|
||||
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['GET', 'POST'])
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
def browsersteps_start_session():
|
||||
# A new session was requested, return sessionID
|
||||
|
||||
import uuid
|
||||
browsersteps_session_id = str(uuid.uuid4())
|
||||
|
||||
watch_uuid = request.args.get('uuid')
|
||||
global browsersteps_sessions
|
||||
|
||||
print("Starting connection with playwright")
|
||||
logging.debug("browser_steps.py connecting")
|
||||
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
|
||||
print("Starting connection with playwright - done")
|
||||
return {'browsersteps_session_id': browsersteps_session_id}
|
||||
|
||||
# A request for an action was received
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
def browsersteps_ui_update():
|
||||
import base64
|
||||
import playwright._impl._api_types
|
||||
import time
|
||||
|
||||
global browsersteps_sessions
|
||||
from changedetectionio.blueprint.browser_steps import browser_steps
|
||||
|
||||
global browsersteps_live_ui_o, browsersteps_playwright_browser_interface_end_time
|
||||
global browsersteps_playwright_browser_interface_browser
|
||||
global browsersteps_playwright_browser_interface
|
||||
global browsersteps_playwright_browser_interface_start_time
|
||||
|
||||
step_n = None
|
||||
remaining =0
|
||||
uuid = request.args.get('uuid')
|
||||
|
||||
|
@ -87,13 +132,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||
if not browsersteps_session_id:
|
||||
return make_response('No browsersteps_session_id specified', 500)
|
||||
|
||||
# Because we don't "really" run in a context manager ( we make the playwright interface global/long-living )
|
||||
# We need to manage the shutdown when the time is up
|
||||
if browsersteps_playwright_browser_interface_end_time:
|
||||
remaining = browsersteps_playwright_browser_interface_end_time-time.time()
|
||||
if browsersteps_playwright_browser_interface_end_time and remaining <= 0:
|
||||
cleanup_playwright_session()
|
||||
return make_response('Browser session expired, please reload the Browser Steps interface', 401)
|
||||
if not browsersteps_sessions.get(browsersteps_session_id):
|
||||
return make_response('No session exists under that ID', 500)
|
||||
|
||||
|
||||
# Actions - step/apply/etc, do the thing and return state
|
||||
if request.method == 'POST':
|
||||
|
@ -112,12 +153,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||
# @todo try.. accept.. nice errors not popups..
|
||||
try:
|
||||
|
||||
this_session = browsersteps_live_ui_o.get(browsersteps_session_id)
|
||||
if not this_session:
|
||||
print("Browser exited")
|
||||
return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
||||
|
||||
this_session.call_action(action_name=step_operation,
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
|
||||
selector=step_selector,
|
||||
optional_value=step_optional_value)
|
||||
|
||||
|
@ -129,108 +165,43 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||
# Get visual selector ready/update its data (also use the current filter info from the page?)
|
||||
# When the last 'apply' button was pressed
|
||||
# @todo this adds overhead because the xpath selection is happening twice
|
||||
u = this_session.page.url
|
||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||
if is_last_step and u:
|
||||
(screenshot, xpath_data) = this_session.request_visualselector_data()
|
||||
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
|
||||
datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
|
||||
datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
|
||||
|
||||
# Setup interface
|
||||
if request.method == 'GET':
|
||||
# if not this_session.page:
|
||||
# cleanup_playwright_session()
|
||||
# return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
||||
|
||||
if not browsersteps_playwright_browser_interface:
|
||||
print("Starting connection with playwright")
|
||||
logging.debug("browser_steps.py connecting")
|
||||
# Screenshots and other info only needed on requesting a step (POST)
|
||||
try:
|
||||
state = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||
except playwright._impl._api_types.Error as e:
|
||||
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
||||
|
||||
global browsersteps_playwright_browser_interface_context
|
||||
from . import nonContext
|
||||
browsersteps_playwright_browser_interface_context = nonContext.c_sync_playwright()
|
||||
browsersteps_playwright_browser_interface = browsersteps_playwright_browser_interface_context.start()
|
||||
# At 20 minutes, some other variable is closing it
|
||||
# @todo find out what it is and set it
|
||||
seconds_keepalive = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||
# Use send_file() which is way faster than read/write loop on bytes
|
||||
import json
|
||||
from tempfile import mkstemp
|
||||
from flask import send_file
|
||||
tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-")
|
||||
|
||||
# keep it alive for 10 seconds more than we advertise, sometimes it helps to keep it shutting down cleanly
|
||||
keepalive = "&timeout={}".format(((seconds_keepalive+3) * 1000))
|
||||
try:
|
||||
browsersteps_playwright_browser_interface_browser = browsersteps_playwright_browser_interface.chromium.connect_over_cdp(
|
||||
os.getenv('PLAYWRIGHT_DRIVER_URL', '') + keepalive)
|
||||
except Exception as e:
|
||||
if 'ECONNREFUSED' in str(e):
|
||||
return make_response('Unable to start the Playwright session properly, is it running?', 401)
|
||||
output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format(
|
||||
base64.b64encode(state[0]).decode('ascii')),
|
||||
'xpath_data': state[1],
|
||||
'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
|
||||
'browser_time_remaining': round(remaining)
|
||||
})
|
||||
|
||||
browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3)
|
||||
print("Starting connection with playwright - done")
|
||||
with os.fdopen(tmp_fd, 'w') as f:
|
||||
f.write(output)
|
||||
|
||||
if not browsersteps_live_ui_o.get(browsersteps_session_id):
|
||||
# Boot up a new session
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||
if proxy_url:
|
||||
|
||||
# Playwright needs separate username and password values
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
|
||||
print("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||
|
||||
# Begin the new "Playwright Context" that re-uses the playwright interface
|
||||
# Each session is a "Playwright Context" as a list, that uses the playwright interface
|
||||
browsersteps_live_ui_o[browsersteps_session_id] = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browsersteps_playwright_browser_interface_browser,
|
||||
proxy=proxy)
|
||||
this_session = browsersteps_live_ui_o[browsersteps_session_id]
|
||||
|
||||
if not this_session.page:
|
||||
cleanup_playwright_session()
|
||||
return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
||||
|
||||
response = None
|
||||
|
||||
if request.method == 'POST':
|
||||
# Screenshots and other info only needed on requesting a step (POST)
|
||||
try:
|
||||
state = this_session.get_current_state()
|
||||
except playwright._impl._api_types.Error as e:
|
||||
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
||||
|
||||
# Use send_file() which is way faster than read/write loop on bytes
|
||||
import json
|
||||
from tempfile import mkstemp
|
||||
from flask import send_file
|
||||
tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-")
|
||||
|
||||
output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format(
|
||||
base64.b64encode(state[0]).decode('ascii')),
|
||||
'xpath_data': state[1],
|
||||
'session_age_start': this_session.age_start,
|
||||
'browser_time_remaining': round(remaining)
|
||||
})
|
||||
|
||||
with os.fdopen(tmp_fd, 'w') as f:
|
||||
f.write(output)
|
||||
|
||||
response = make_response(send_file(path_or_file=tmp_file,
|
||||
mimetype='application/json; charset=UTF-8',
|
||||
etag=True))
|
||||
# No longer needed
|
||||
os.unlink(tmp_file)
|
||||
|
||||
elif request.method == 'GET':
|
||||
# Just enough to get the session rolling, it will call for goto-site via POST next
|
||||
response = make_response({
|
||||
'session_age_start': this_session.age_start,
|
||||
'browser_time_remaining': round(remaining)
|
||||
})
|
||||
response = make_response(send_file(path_or_file=tmp_file,
|
||||
mimetype='application/json; charset=UTF-8',
|
||||
etag=True))
|
||||
# No longer needed
|
||||
os.unlink(tmp_file)
|
||||
|
||||
return response
|
||||
|
||||
|
|
|
@ -71,10 +71,10 @@ class steppable_browser_interface():
|
|||
optional_value = str(jinja2_env.from_string(optional_value).render())
|
||||
|
||||
action_handler(selector, optional_value)
|
||||
self.page.wait_for_timeout(3 * 1000)
|
||||
self.page.wait_for_timeout(1.5 * 1000)
|
||||
print("Call action done in", time.time() - now)
|
||||
|
||||
def action_goto_url(self, selector, value):
|
||||
def action_goto_url(self, selector=None, value=None):
|
||||
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
||||
now = time.time()
|
||||
response = self.page.goto(value, timeout=0, wait_until='commit')
|
||||
|
@ -105,7 +105,8 @@ class steppable_browser_interface():
|
|||
print("Clicking element")
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
|
||||
|
||||
self.page.click(selector=selector, timeout=30 * 1000, delay=randint(200, 500))
|
||||
|
||||
def action_click_element_if_exists(self, selector, value):
|
||||
import playwright._impl._api_types as _api_types
|
||||
|
@ -137,13 +138,13 @@ class steppable_browser_interface():
|
|||
def action_wait_for_text(self, selector, value):
|
||||
import json
|
||||
v = json.dumps(value)
|
||||
self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000)
|
||||
self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=90000)
|
||||
|
||||
def action_wait_for_text_in_element(self, selector, value):
|
||||
import json
|
||||
s = json.dumps(selector)
|
||||
v = json.dumps(value)
|
||||
self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000)
|
||||
self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=90000)
|
||||
|
||||
# @todo - in the future make some popout interface to capture what needs to be set
|
||||
# https://playwright.dev/python/docs/api/class-keyboard
|
||||
|
|
|
@ -182,7 +182,8 @@ class Fetcher():
|
|||
optional_value=optional_value)
|
||||
self.screenshot_step(step_n)
|
||||
self.save_step_html(step_n)
|
||||
except TimeoutError:
|
||||
except TimeoutError as e:
|
||||
print(str(e))
|
||||
# Stop processing here
|
||||
raise BrowserStepsStepTimout(step_n=step_n)
|
||||
|
||||
|
|
|
@ -238,7 +238,7 @@ $(document).ready(function () {
|
|||
|
||||
function start() {
|
||||
console.log("Starting browser-steps UI");
|
||||
browsersteps_session_id = Date.now();
|
||||
browsersteps_session_id = false;
|
||||
// @todo This setting of the first one should be done at the datalayer but wtforms doesnt wanna play nice
|
||||
$('#browser_steps >li:first-child').removeClass('empty');
|
||||
set_first_gotosite_disabled();
|
||||
|
@ -246,7 +246,7 @@ $(document).ready(function () {
|
|||
$('.clear,.remove', $('#browser_steps >li:first-child')).hide();
|
||||
$.ajax({
|
||||
type: "GET",
|
||||
url: browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id,
|
||||
url: browser_steps_start_url,
|
||||
statusCode: {
|
||||
400: function () {
|
||||
// More than likely the CSRF token was lost when the server restarted
|
||||
|
@ -254,12 +254,12 @@ $(document).ready(function () {
|
|||
}
|
||||
}
|
||||
}).done(function (data) {
|
||||
xpath_data = data.xpath_data;
|
||||
$("#loading-status-text").fadeIn();
|
||||
browsersteps_session_id = data.browsersteps_session_id;
|
||||
// This should trigger 'Goto site'
|
||||
console.log("Got startup response, requesting Goto-Site (first) step fake click");
|
||||
$('#browser_steps >li:first-child .apply').click();
|
||||
browserless_seconds_remaining = data.browser_time_remaining;
|
||||
browserless_seconds_remaining = 500;
|
||||
set_first_gotosite_disabled();
|
||||
}).fail(function (data) {
|
||||
console.log(data);
|
||||
|
|
|
@ -14,7 +14,9 @@
|
|||
{% endif %}
|
||||
|
||||
const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
|
||||
const browser_steps_start_url="{{url_for('browser_steps.browsersteps_start_session', uuid=uuid)}}";
|
||||
const browser_steps_sync_url="{{url_for('browser_steps.browsersteps_ui_update', uuid=uuid)}}";
|
||||
|
||||
</script>
|
||||
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
|
||||
|
|
Ładowanie…
Reference in New Issue