diff --git a/changedetectionio/blueprint/settings/templates/settings.html b/changedetectionio/blueprint/settings/templates/settings.html index 150eb35e..5e268fd5 100644 --- a/changedetectionio/blueprint/settings/templates/settings.html +++ b/changedetectionio/blueprint/settings/templates/settings.html @@ -264,17 +264,20 @@ nav
+

New: click here (link to changedetection.io tutorial page) find out how to setup and example

+
+ key fields should be some password type field so you can see its set but doesnt contain the key on view and doesnt lose it on save
+
{{ render_simple_field(form.application.form.ai.form.LLM_backend) }} Preferred LLM connection -
- {{ render_checkbox_field(form.application.form.ai.form.openai_key) }} + {{ render_checkbox_field(form.application.form.ai.form.API_keys.form.openai) }} Go here to read more about OpenAI integration
- {{ render_checkbox_field(form.application.form.ai.form.gemini_key) }} + {{ render_checkbox_field(form.application.form.ai.form.API_keys.form.gemini) }} Go here to read more about Google Gemini integration
diff --git a/changedetectionio/blueprint/ui/views.py b/changedetectionio/blueprint/ui/views.py index d9ae5052..d7cd23e3 100644 --- a/changedetectionio/blueprint/ui/views.py +++ b/changedetectionio/blueprint/ui/views.py @@ -212,7 +212,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe add_paused = request.form.get('edit_and_watch_submit_button') != None processor = request.form.get('processor', 'text_json_diff') - new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor}) + extras = {'paused': add_paused, 'processor': processor} + + LLM_prompt = request.form.get('LLM_prompt', '').strip() + if LLM_prompt: + extras['LLM_prompt'] = LLM_prompt + extras['LLM_send_type'] = request.form.get('LLM_send_type', 'text') + + new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras=extras) if new_uuid: if add_paused: diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 4658b2ac..8c0ca435 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -761,6 +761,17 @@ class globalSettingsApplicationUIForm(Form): socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()]) favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()]) +class globalSettingsApplicationAIKeysForm(Form): + + openai = StringField('OpenAI Key', + validators=[validators.Optional()], + render_kw={"placeholder": 'xxxxxxxxx'} + ) + gemini = StringField('Google Gemini Key', + validators=[validators.Optional()], + render_kw={"placeholder": 'ooooooooo'} + ) + class globalSettingsApplicationAIForm(Form): #@todo use only configured types? @@ -768,14 +779,9 @@ class globalSettingsApplicationAIForm(Form): choices=[('openai', 'Open AI'), ('gemini', 'Gemini')], default="text") - openai_key = StringField('OpenAI Key', - validators=[validators.Optional()], - render_kw={"placeholder": 'xxxxxxxxx'} - ) - gemini_key = StringField('Google Gemini Key', - validators=[validators.Optional()], - render_kw={"placeholder": 'ooooooooo'} - ) + # So that we can pass this to our LLM/__init__.py as a keys dict + API_keys = FormField(globalSettingsApplicationAIKeysForm) + # datastore.data['settings']['application'].. diff --git a/changedetectionio/processors/LLM/__init__.py b/changedetectionio/processors/LLM/__init__.py new file mode 100644 index 00000000..9081bcce --- /dev/null +++ b/changedetectionio/processors/LLM/__init__.py @@ -0,0 +1,64 @@ +import importlib +from langchain_core.messages import SystemMessage, HumanMessage + +SYSTEM_MESSAGE = ( + "You are a text analyser who will attempt to give the most concise information " + "to the request, the information should be returned in a way that if I ask you again " + "I should get the same answer if the outcome is the same. The goal is to cut down " + "or reduce the text changes from you when i ask the same question about similar content " + "Always list items in exactly the same order and wording as found in the source text. " +) + + +class LLM_integrate: + PROVIDER_MAP = { + "openai": ("langchain_openai", "ChatOpenAI"), + "azure": ("langchain_community.chat_models", "AzureChatOpenAI"), + "gemini": ("langchain_google_genai", "ChatGoogleGenerativeAI") + } + + def __init__(self, api_keys: dict): + """ + api_keys = { + "openai": "sk-xxx", + "azure": "AZURE_KEY", + "gemini": "GEMINI_KEY" + } + """ + self.api_keys = api_keys + + def run(self, provider: str, model: str, message: str): + module_name, class_name = self.PROVIDER_MAP[provider] + + # Import the class dynamically + module = importlib.import_module(module_name) + LLMClass = getattr(module, class_name) + + # Create the LLM object + llm_kwargs = {} + if provider == "openai": + llm_kwargs = dict(api_key=self.api_keys.get("openai", ''), + model=model, + # https://api.python.langchain.com/en/latest/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html#langchain_openai.chat_models.base.ChatOpenAI.temperature + temperature=0 # most deterministic, + ) + elif provider == "azure": + llm_kwargs = dict( + api_key=self.api_keys["azure"], + azure_endpoint="https://.openai.azure.com", + deployment_name=model + ) + elif provider == "gemini": + llm_kwargs = dict(api_key=self.api_keys.get("gemini"), model=model) + + llm = LLMClass(**llm_kwargs) + + # Build your messages + messages = [ + SystemMessage(content=SYSTEM_MESSAGE), + HumanMessage(content=message) + ] + + # Run the model asynchronously + result = llm.invoke(messages) + return result.content diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index 2ae6df4d..3fbed9c1 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -1,5 +1,6 @@ from abc import abstractmethod from changedetectionio.content_fetchers.base import Fetcher +from changedetectionio.processors.LLM import LLM_integrate from changedetectionio.strtobool import strtobool from copy import deepcopy from loguru import logger diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index 760aabae..c59950c2 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -7,7 +7,7 @@ import re import urllib3 from changedetectionio.conditions import execute_ruleset_against_all_plugins -from changedetectionio.processors import difference_detection_processor +from changedetectionio.processors import difference_detection_processor, LLM_integrate from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE from changedetectionio import html_tools, content_fetchers from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT @@ -293,6 +293,30 @@ class perform_site_check(difference_detection_processor): # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n") stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) +### OPENAI? + + + # And here we run LLM integration based on the content we received + LLM_keys = self.datastore.data['settings']['application']['ai'].get('API_keys', {}) + if watch.get('LLM_prompt') and stripped_text_from_html and LLM_keys: + response = "" + try: + integrator = LLM_integrate(api_keys=LLM_keys) + response = integrator.run( + provider="openai", + model="gpt-4.1", #gpt-4-turbo + message=f"{watch.get('LLM_prompt')}\n----------- Content follows-----------\n\n{stripped_text_from_html}" + ) + except Exception as e: + logger.critical(f"Error running LLM integration {str(e)} (type etc)") + raise(e) + x = 1 + # todo is there something special when tokens are used up etc? + else: + stripped_text_from_html = response + # logger.trace("LLM done") + finally: + logger.debug("LLM request done (type etc)") ### CALCULATE MD5 # If there's text to ignore diff --git a/changedetectionio/static/js/watch-settings.js b/changedetectionio/static/js/watch-settings.js index af6a8b6c..de5ca04e 100644 --- a/changedetectionio/static/js/watch-settings.js +++ b/changedetectionio/static/js/watch-settings.js @@ -21,7 +21,7 @@ function request_textpreview_update() { namespace: 'watchEdit' }).done(function (data) { console.debug(data['duration']) - $('#error-text').text(''); + $('#error-text').text(data['duration']); $('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']); $('#filters-and-triggers #text-preview-inner') .text(data['after_filter']) diff --git a/requirements.txt b/requirements.txt index 01e5fecd..bc39b438 100644 --- a/requirements.txt +++ b/requirements.txt @@ -69,6 +69,9 @@ werkzeug==3.0.6 # Templating, so far just in the URLs but in the future can be for the notifications also jinja2~=3.1 jinja2-time +langchain~=0.3 +langchain-openai~=0.3 + openpyxl # https://peps.python.org/pep-0508/#environment-markers # https://github.com/dgtlmoon/changedetection.io/pull/1009