kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Add custom out-of-stock and in-stock string detection
- Implements configurable custom strings for restock detection (fixes #2779) - Adds robust text normalization (case-insensitive, accent removal, whitespace) - Supports international sites with custom messages like 'Pronto estarán en stock\!' - Makes built-in in-stock detection configurable (addresses TODO) - Includes comprehensive unit and integration tests 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>pull/3334/head
rodzic
011fa3540e
commit
1a2e9309ed
|
|
@ -1,8 +1,8 @@
|
|||
async () => {
|
||||
async (customOutOfStockStrings = []) => {
|
||||
|
||||
function isItemInStock() {
|
||||
// @todo Pass these in so the same list can be used in non-JS fetchers
|
||||
const outOfStockTexts = [
|
||||
const builtInOutOfStockTexts = [
|
||||
' أخبرني عندما يتوفر',
|
||||
'0 in stock',
|
||||
'actuellement indisponible',
|
||||
|
|
@ -110,6 +110,9 @@ async () => {
|
|||
'품절'
|
||||
];
|
||||
|
||||
// Combine built-in strings with custom strings provided by user
|
||||
const outOfStockTexts = [...builtInOutOfStockTexts, ...customOutOfStockStrings];
|
||||
|
||||
|
||||
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
from wtforms import (
|
||||
BooleanField,
|
||||
validators,
|
||||
FloatField
|
||||
FloatField,
|
||||
TextAreaField
|
||||
)
|
||||
from wtforms.fields.choices import RadioField
|
||||
from wtforms.fields.form import FormField
|
||||
|
|
@ -28,6 +29,16 @@ class RestockSettingsForm(Form):
|
|||
], render_kw={"placeholder": "0%", "size": "5"})
|
||||
|
||||
follow_price_changes = BooleanField('Follow price changes', default=True)
|
||||
|
||||
custom_outofstock_strings = TextAreaField('Custom out-of-stock detection strings',
|
||||
[validators.Optional()],
|
||||
render_kw={"placeholder": "Enter custom out-of-stock strings, one per line\nExample:\nPronto estarán en stock!\nTemporarily out of stock",
|
||||
"rows": "3"})
|
||||
|
||||
custom_instock_strings = TextAreaField('Custom in-stock detection strings',
|
||||
[validators.Optional()],
|
||||
render_kw={"placeholder": "Enter custom in-stock strings, one per line\nExample:\nDisponible ahora\nIn voorraad",
|
||||
"rows": "3"})
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
restock_settings = FormField(RestockSettingsForm)
|
||||
|
|
@ -74,6 +85,14 @@ class processor_settings_form(processor_text_json_diff_form):
|
|||
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
|
||||
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
|
||||
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_field(form.restock_settings.custom_outofstock_strings) }}
|
||||
<span class="pure-form-message-inline">Additional custom out-of-stock detection strings (one per line).</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_field(form.restock_settings.custom_instock_strings) }}
|
||||
<span class="pure-form-message-inline">Additional custom in-stock detection strings (one per line).</span>
|
||||
</fieldset>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
|
|
|||
|
|
@ -143,6 +143,89 @@ def is_between(number, lower=None, upper=None):
|
|||
class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def _normalize_text_for_matching(self, text):
|
||||
"""
|
||||
Normalize text for more robust matching:
|
||||
- Convert to lowercase
|
||||
- Remove accents/diacritics
|
||||
- Normalize whitespace
|
||||
"""
|
||||
import unicodedata
|
||||
import re
|
||||
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# Convert to lowercase
|
||||
text = text.lower()
|
||||
|
||||
# Remove accents/diacritics (NFD normalization + filter)
|
||||
# This converts "é" to "e", "ñ" to "n", etc.
|
||||
text = unicodedata.normalize('NFD', text)
|
||||
text = ''.join(char for char in text if unicodedata.category(char) != 'Mn')
|
||||
|
||||
# Normalize whitespace (replace multiple spaces/tabs/newlines with single space)
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
|
||||
return text
|
||||
|
||||
def _check_custom_strings(self, text_to_check, custom_strings, string_type="out-of-stock"):
|
||||
"""
|
||||
Check text against custom strings (either in-stock or out-of-stock).
|
||||
Uses normalized matching for better international support.
|
||||
Returns the matched string if found, None otherwise.
|
||||
"""
|
||||
if not custom_strings:
|
||||
return None
|
||||
|
||||
# Split custom strings by newlines and clean them up
|
||||
raw_custom_list = [s.strip() for s in custom_strings.split('\n') if s.strip()]
|
||||
|
||||
if not raw_custom_list:
|
||||
return None
|
||||
|
||||
# Normalize both the page text and custom strings for matching
|
||||
normalized_text = self._normalize_text_for_matching(text_to_check)
|
||||
|
||||
# Check each custom string against the text
|
||||
for original_custom_text in raw_custom_list:
|
||||
normalized_custom_text = self._normalize_text_for_matching(original_custom_text)
|
||||
|
||||
if normalized_custom_text and normalized_custom_text in normalized_text:
|
||||
logger.debug(f"Custom {string_type} string found: '{original_custom_text}' (normalized: '{normalized_custom_text}')")
|
||||
return original_custom_text # Return the original user-provided string
|
||||
|
||||
return None
|
||||
|
||||
def _get_combined_instock_strings(self, restock_settings):
|
||||
"""
|
||||
Get combined list of built-in and custom in-stock strings.
|
||||
Custom strings are normalized for better matching.
|
||||
"""
|
||||
# Built-in in-stock strings (from the TODO line)
|
||||
builtin_instock_strings = [
|
||||
'instock',
|
||||
'instoreonly',
|
||||
'limitedavailability',
|
||||
'onlineonly',
|
||||
'presale'
|
||||
]
|
||||
|
||||
# Add custom in-stock strings if provided
|
||||
custom_strings = restock_settings.get('custom_instock_strings', '').strip()
|
||||
if custom_strings:
|
||||
# Normalize custom strings for better matching
|
||||
custom_list = []
|
||||
for s in custom_strings.split('\n'):
|
||||
s = s.strip()
|
||||
if s:
|
||||
normalized = self._normalize_text_for_matching(s)
|
||||
if normalized:
|
||||
custom_list.append(normalized)
|
||||
builtin_instock_strings.extend(custom_list)
|
||||
|
||||
return builtin_instock_strings
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
import hashlib
|
||||
|
|
@ -204,17 +287,13 @@ class perform_site_check(difference_detection_processor):
|
|||
update_obj['restock'] = itemprop_availability
|
||||
|
||||
if itemprop_availability.get('availability'):
|
||||
# @todo: Configurable?
|
||||
if any(substring.lower() in itemprop_availability['availability'].lower() for substring in [
|
||||
'instock',
|
||||
'instoreonly',
|
||||
'limitedavailability',
|
||||
'onlineonly',
|
||||
'presale']
|
||||
):
|
||||
update_obj['restock']['in_stock'] = True
|
||||
else:
|
||||
update_obj['restock']['in_stock'] = False
|
||||
# Now configurable! Check both built-in and custom in-stock strings
|
||||
combined_instock_strings = self._get_combined_instock_strings(restock_settings)
|
||||
normalized_availability = self._normalize_text_for_matching(itemprop_availability['availability'])
|
||||
|
||||
# Check if any of the in-stock strings match
|
||||
found_match = any(substring in normalized_availability for substring in combined_instock_strings)
|
||||
update_obj['restock']['in_stock'] = found_match
|
||||
|
||||
# Main detection method
|
||||
fetched_md5 = None
|
||||
|
|
@ -225,21 +304,45 @@ class perform_site_check(difference_detection_processor):
|
|||
update_obj['restock']["original_price"] = itemprop_availability.get('price')
|
||||
|
||||
if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
|
||||
raise ProcessorException(
|
||||
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
# Before giving up, check if we have custom out-of-stock strings that might match
|
||||
custom_strings = restock_settings.get('custom_outofstock_strings', '').strip()
|
||||
if custom_strings:
|
||||
custom_stock_result = self._check_custom_strings(text, custom_strings, "out-of-stock")
|
||||
if custom_stock_result:
|
||||
# Found a match with custom strings
|
||||
update_obj['restock']['in_stock'] = False
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} custom out-of-stock detection found (no JS): '{custom_stock_result}'")
|
||||
else:
|
||||
# No custom string match, assume in stock
|
||||
update_obj['restock']['in_stock'] = True
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} no custom out-of-stock strings matched, assuming in stock")
|
||||
else:
|
||||
raise ProcessorException(
|
||||
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
logger.debug(f"self.fetcher.instock_data is - '{self.fetcher.instock_data}' and itemprop_availability.get('availability') is {itemprop_availability.get('availability')}")
|
||||
# Nothing automatic in microdata found, revert to scraping the page
|
||||
if self.fetcher.instock_data and itemprop_availability.get('availability') is None:
|
||||
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
||||
# Careful! this does not really come from chrome/js when the watch is set to plaintext
|
||||
update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.")
|
||||
stock_detection_result = self.fetcher.instock_data
|
||||
|
||||
# Check if we have custom out-of-stock strings and JS returned "Possibly in stock"
|
||||
custom_strings = restock_settings.get('custom_outofstock_strings', '').strip()
|
||||
if stock_detection_result == 'Possibly in stock' and custom_strings:
|
||||
# Re-check using custom strings against the page text
|
||||
custom_stock_result = self._check_custom_strings(text, custom_strings, "out-of-stock")
|
||||
if custom_stock_result:
|
||||
stock_detection_result = custom_stock_result
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} custom out-of-stock detection found: '{custom_stock_result}'")
|
||||
|
||||
update_obj['restock']["in_stock"] = True if stock_detection_result == 'Possibly in stock' else False
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{stock_detection_result}' from JS scraper.")
|
||||
|
||||
# Very often websites will lie about the 'availability' in the metadata, so if the scraped version says its NOT in stock, use that.
|
||||
if self.fetcher.instock_data and self.fetcher.instock_data != 'Possibly in stock':
|
||||
|
|
|
|||
|
|
@ -111,3 +111,130 @@ def test_restock_detection(client, live_server, measure_memory_usage):
|
|||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'not-in-stock' in res.data, "Correctly showing NOT IN STOCK in the list after it changed from IN STOCK"
|
||||
|
||||
|
||||
def test_restock_custom_strings(client, live_server):
|
||||
"""Test custom out-of-stock strings feature"""
|
||||
|
||||
# Set up a response with custom out-of-stock text
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div>price: $10.99</div>
|
||||
<div id="custom">Pronto estarán en stock!</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet')
|
||||
|
||||
# Add watch with custom out-of-stock strings
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": '', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Get the UUID so we can configure the watch
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
# Configure custom out-of-stock strings
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
|
||||
data={
|
||||
"url": test_url,
|
||||
'processor': 'restock_diff',
|
||||
'restock_settings-custom_outofstock_strings': 'Pronto estarán en stock!\nCustom unavailable message'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Check that it detects as out of stock
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'not-in-stock' in res.data, "Should detect custom out-of-stock string"
|
||||
|
||||
# Test custom in-stock strings by changing the content
|
||||
test_return_data_instock = """<html>
|
||||
<body>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div>price: $10.99</div>
|
||||
<div id="custom">Disponible ahora</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data_instock)
|
||||
|
||||
# Update the watch to include custom in-stock strings
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
|
||||
data={
|
||||
"url": test_url,
|
||||
'processor': 'restock_diff',
|
||||
'restock_settings-custom_outofstock_strings': 'Pronto estarán en stock!\nCustom unavailable message',
|
||||
'restock_settings-custom_instock_strings': 'Disponible ahora\nIn voorraad'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Check again - should be detected as in stock now
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'not-in-stock' not in res.data, "Should detect custom in-stock string and show as available"
|
||||
|
||||
|
||||
def test_restock_custom_strings_normalization(client, live_server):
|
||||
"""Test key normalization scenarios: accents, case, and spaces"""
|
||||
|
||||
# Test page with Spanish text with accents and mixed case
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
<div>price: $10.99</div>
|
||||
<div id="status">¡TEMPORALMENTE AGOTADO!</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet')
|
||||
|
||||
# Add watch
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": '', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
# Configure custom string without accents, lowercase, no extra spaces
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
|
||||
data={
|
||||
"url": test_url,
|
||||
'processor': 'restock_diff',
|
||||
'restock_settings-custom_outofstock_strings': 'temporalmente agotado'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Should detect as out of stock despite text differences
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'not-in-stock' in res.data, "Should match despite accents, case, and spacing differences"
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,95 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import unittest
|
||||
from changedetectionio.processors.restock_diff.processor import perform_site_check
|
||||
|
||||
|
||||
class TestCustomStringNormalization(unittest.TestCase):
|
||||
"""Test the text normalization logic for custom out-of-stock strings"""
|
||||
|
||||
def setUp(self):
|
||||
# Create a processor instance for testing
|
||||
self.processor = perform_site_check(datastore=None, watch_uuid='test')
|
||||
|
||||
def test_normalize_text_for_matching(self):
|
||||
"""Test the _normalize_text_for_matching method"""
|
||||
|
||||
test_cases = [
|
||||
# (input, expected_output)
|
||||
("Agotado", "agotado"),
|
||||
("AGOTADO", "agotado"), # Lowercase
|
||||
("Sin stock!", "sin stock!"), # Normalize whitespace
|
||||
("Pronto\t\nestarán\nen stock", "pronto estaran en stock"), # Multiple whitespace types + accents
|
||||
("¡Temporalmente AGOTADO!", "¡temporalmente agotado!"), # Complex case
|
||||
("", ""), # Empty string
|
||||
("café", "cafe"), # French accent
|
||||
("naïve", "naive"), # Multiple accents
|
||||
]
|
||||
|
||||
for input_text, expected in test_cases:
|
||||
with self.subTest(input_text=input_text):
|
||||
result = self.processor._normalize_text_for_matching(input_text)
|
||||
self.assertEqual(result, expected,
|
||||
f"Failed to normalize '{input_text}' -> expected '{expected}', got '{result}'")
|
||||
|
||||
def test_check_custom_strings_normalization(self):
|
||||
"""Test that custom string matching works with normalization"""
|
||||
|
||||
test_cases = [
|
||||
# (page_text, custom_strings, should_match, description)
|
||||
("AGOTADO", "agotado", True, "uppercase to lowercase"),
|
||||
("Agotado", "agotado", True, "single uppercase to lowercase"),
|
||||
("Sin stock!", "sin stock", True, "multiple spaces normalized"),
|
||||
("¡Pronto estarán en stock!", "pronto estaran en stock", True, "accents + spaces"),
|
||||
("TEMPORALMENTE AGOTADO", "temporalmente agotado", True, "multi-word uppercase"),
|
||||
("Available now", "agotado", False, "no match case"),
|
||||
("", "agotado", False, "empty text"),
|
||||
("agotado", "", False, "empty custom strings"),
|
||||
]
|
||||
|
||||
for page_text, custom_strings, should_match, description in test_cases:
|
||||
with self.subTest(description=description):
|
||||
result = self.processor._check_custom_strings(page_text, custom_strings, "out-of-stock")
|
||||
|
||||
if should_match:
|
||||
self.assertIsNotNone(result,
|
||||
f"Expected match for '{description}': '{page_text}' should match '{custom_strings}'")
|
||||
else:
|
||||
self.assertIsNone(result,
|
||||
f"Expected no match for '{description}': '{page_text}' should not match '{custom_strings}'")
|
||||
|
||||
def test_check_custom_strings_multiline(self):
|
||||
"""Test that multi-line custom strings work properly"""
|
||||
|
||||
page_text = "Product status: TEMPORALMENTE AGOTADO"
|
||||
custom_strings = """
|
||||
sin stock
|
||||
agotado
|
||||
temporalmente agotado
|
||||
"""
|
||||
|
||||
result = self.processor._check_custom_strings(page_text, custom_strings, "out-of-stock")
|
||||
self.assertIsNotNone(result)
|
||||
self.assertEqual(result.strip(), "temporalmente agotado")
|
||||
|
||||
def test_get_combined_instock_strings_normalization(self):
|
||||
"""Test that custom in-stock strings are normalized properly"""
|
||||
|
||||
restock_settings = {
|
||||
'custom_instock_strings': 'Disponible AHORA\nEn Stock\nDISPONÍBLE'
|
||||
}
|
||||
|
||||
result = self.processor._get_combined_instock_strings(restock_settings)
|
||||
|
||||
# Check that built-in strings are included
|
||||
self.assertIn('instock', result)
|
||||
self.assertIn('presale', result)
|
||||
|
||||
# Check that custom strings are normalized and included
|
||||
self.assertIn('disponible ahora', result)
|
||||
self.assertIn('en stock', result)
|
||||
self.assertIn('disponible', result) # accent removed
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Ładowanie…
Reference in New Issue