WIP

2025-07-24 00:23:05 +02:00 · 2025-07-24 00:23:05 +02:00 · df9258a8f7
commit df9258a8f7
--- a/changedetectionio/blueprint/settings/templates/settings.html
+++ b/changedetectionio/blueprint/settings/templates/settings.html
@ -264,17 +264,20 @@ nav

            </div>
            <div class="tab-pane-inner" id="ai-options">
+                <p><strong>New:</strong> click here (link to changedetection.io tutorial page) find out how to setup and example</p>
+                <br>
+                key fields should be some password type field so you can see its set but doesnt contain the key on view and doesnt lose it on save<br>
+
                <div class="pure-control-group inline-radio">
                    {{ render_simple_field(form.application.form.ai.form.LLM_backend) }}
                    <span class="pure-form-message-inline">Preferred LLM connection</span>
-
                </div>
                <div class="pure-control-group">
-                    {{ render_checkbox_field(form.application.form.ai.form.openai_key) }}
+                    {{ render_checkbox_field(form.application.form.ai.form.API_keys.form.openai) }}
                    <span class="pure-form-message-inline">Go here to read more about OpenAI integration</span>
                </div>
                <div class="pure-control-group">
-                    {{ render_checkbox_field(form.application.form.ai.form.gemini_key) }}
+                    {{ render_checkbox_field(form.application.form.ai.form.API_keys.form.gemini) }}
                    <span class="pure-form-message-inline">Go here to read more about Google Gemini integration</span>
                </div>
            </div>
--- a/changedetectionio/blueprint/ui/views.py
+++ b/changedetectionio/blueprint/ui/views.py
@ -212,7 +212,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe

        add_paused = request.form.get('edit_and_watch_submit_button') != None
        processor = request.form.get('processor', 'text_json_diff')
-        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
+        extras = {'paused': add_paused, 'processor': processor}
+
+        LLM_prompt = request.form.get('LLM_prompt', '').strip()
+        if LLM_prompt:
+            extras['LLM_prompt'] = LLM_prompt
+            extras['LLM_send_type'] = request.form.get('LLM_send_type', 'text')
+
+        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras=extras)

        if new_uuid:
            if add_paused:
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@ -761,6 +761,17 @@ class globalSettingsApplicationUIForm(Form):
    socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
    favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])

+class globalSettingsApplicationAIKeysForm(Form):
+
+    openai = StringField('OpenAI Key',
+                           validators=[validators.Optional()],
+                           render_kw={"placeholder": 'xxxxxxxxx'}
+                           )
+    gemini = StringField('Google Gemini Key',
+                           validators=[validators.Optional()],
+                           render_kw={"placeholder": 'ooooooooo'}
+                           )
+
 class globalSettingsApplicationAIForm(Form):

    #@todo use only configured types?
@ -768,14 +779,9 @@ class globalSettingsApplicationAIForm(Form):
                               choices=[('openai', 'Open AI'), ('gemini', 'Gemini')],
                               default="text")

-    openai_key = StringField('OpenAI Key',
-                           validators=[validators.Optional()],
-                           render_kw={"placeholder": 'xxxxxxxxx'}
-                           )
-    gemini_key = StringField('Google Gemini Key',
-                           validators=[validators.Optional()],
-                           render_kw={"placeholder": 'ooooooooo'}
-                           )
+    # So that we can pass this to our LLM/__init__.py as a keys dict
+    API_keys = FormField(globalSettingsApplicationAIKeysForm)
+


 # datastore.data['settings']['application']..
--- a/changedetectionio/processors/LLM/init.py
+++ b/changedetectionio/processors/LLM/init.py
@ -0,0 +1,64 @@
+import importlib
+from langchain_core.messages import SystemMessage, HumanMessage
+
+SYSTEM_MESSAGE = (
+    "You are a text analyser who will attempt to give the most concise information "
+    "to the request, the information should be returned in a way that if I ask you again "
+    "I should get the same answer if the outcome is the same. The goal is to cut down "
+    "or reduce the text changes from you when i ask the same question about similar content "
+    "Always list items in exactly the same order and wording as found in the source text. "
+)
+
+
+class LLM_integrate:
+    PROVIDER_MAP = {
+        "openai": ("langchain_openai", "ChatOpenAI"),
+        "azure": ("langchain_community.chat_models", "AzureChatOpenAI"),
+        "gemini": ("langchain_google_genai", "ChatGoogleGenerativeAI")
+    }
+
+    def __init__(self, api_keys: dict):
+        """
+        api_keys = {
+            "openai": "sk-xxx",
+            "azure": "AZURE_KEY",
+            "gemini": "GEMINI_KEY"
+        }
+        """
+        self.api_keys = api_keys
+
+    def run(self, provider: str, model: str, message: str):
+        module_name, class_name = self.PROVIDER_MAP[provider]
+
+        # Import the class dynamically
+        module = importlib.import_module(module_name)
+        LLMClass = getattr(module, class_name)
+
+        # Create the LLM object
+        llm_kwargs = {}
+        if provider == "openai":
+            llm_kwargs = dict(api_key=self.api_keys.get("openai", ''),
+                              model=model,
+                              # https://api.python.langchain.com/en/latest/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html#langchain_openai.chat_models.base.ChatOpenAI.temperature
+                              temperature=0 # most deterministic,
+                              )
+        elif provider == "azure":
+            llm_kwargs = dict(
+                api_key=self.api_keys["azure"],
+                azure_endpoint="https://<your-endpoint>.openai.azure.com",
+                deployment_name=model
+            )
+        elif provider == "gemini":
+            llm_kwargs = dict(api_key=self.api_keys.get("gemini"), model=model)
+
+        llm = LLMClass(**llm_kwargs)
+
+        # Build your messages
+        messages = [
+            SystemMessage(content=SYSTEM_MESSAGE),
+            HumanMessage(content=message)
+        ]
+
+        # Run the model asynchronously
+        result = llm.invoke(messages)
+        return result.content
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@ -1,5 +1,6 @@
 from abc import abstractmethod
 from changedetectionio.content_fetchers.base import Fetcher
+from changedetectionio.processors.LLM import LLM_integrate
 from changedetectionio.strtobool import strtobool
 from copy import deepcopy
 from loguru import logger
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@ -7,7 +7,7 @@ import re
 import urllib3

 from changedetectionio.conditions import execute_ruleset_against_all_plugins
-from changedetectionio.processors import difference_detection_processor
+from changedetectionio.processors import difference_detection_processor, LLM_integrate
 from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
 from changedetectionio import html_tools, content_fetchers
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
@ -293,6 +293,30 @@ class perform_site_check(difference_detection_processor):
            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
+### OPENAI?
+
+
+        # And here we run LLM integration based on the content we received
+        LLM_keys =  self.datastore.data['settings']['application']['ai'].get('API_keys', {})
+        if watch.get('LLM_prompt') and stripped_text_from_html and LLM_keys:
+            response = ""
+            try:
+                integrator = LLM_integrate(api_keys=LLM_keys)
+                response = integrator.run(
+                    provider="openai",
+                    model="gpt-4.1", #gpt-4-turbo
+                    message=f"{watch.get('LLM_prompt')}\n----------- Content follows-----------\n\n{stripped_text_from_html}"
+                )
+            except Exception as e:
+                logger.critical(f"Error running LLM integration {str(e)} (type etc)")
+                raise(e)
+                x = 1
+                # todo is there something special when tokens are used up etc?
+            else:
+                stripped_text_from_html = response
+               # logger.trace("LLM done")
+            finally:
+                logger.debug("LLM request done (type etc)")

 ### CALCULATE MD5
        # If there's text to ignore
--- a/changedetectionio/static/js/watch-settings.js
+++ b/changedetectionio/static/js/watch-settings.js
@ -21,7 +21,7 @@ function request_textpreview_update() {
        namespace: 'watchEdit'
    }).done(function (data) {
        console.debug(data['duration'])
-        $('#error-text').text('');
+        $('#error-text').text(data['duration']);
        $('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']);
        $('#filters-and-triggers #text-preview-inner')
            .text(data['after_filter'])
--- a/requirements.txt
+++ b/requirements.txt
@ -69,6 +69,9 @@ werkzeug==3.0.6
 # Templating, so far just in the URLs but in the future can be for the notifications also
 jinja2~=3.1
 jinja2-time
+langchain~=0.3
+langchain-openai~=0.3
+
 openpyxl
 # https://peps.python.org/pep-0508/#environment-markers
 # https://github.com/dgtlmoon/changedetection.io/pull/1009