kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Restock multiprice improvements (#2698)
rodzic
d39530b261
commit
a278fa22f2
|
@ -27,22 +27,27 @@ def _search_prop_by_value(matches, value):
|
||||||
return prop[1] # Yield the desired value and exit the function
|
return prop[1] # Yield the desired value and exit the function
|
||||||
|
|
||||||
def _deduplicate_prices(data):
|
def _deduplicate_prices(data):
|
||||||
seen = set()
|
import re
|
||||||
unique_data = []
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159"
|
||||||
|
Get all the values, clean it and add it to a set then return the unique values
|
||||||
|
'''
|
||||||
|
unique_data = set()
|
||||||
|
|
||||||
|
# Return the complete 'datum' where its price was not seen before
|
||||||
for datum in data:
|
for datum in data:
|
||||||
# Convert 'value' to float if it can be a numeric string, otherwise leave it as is
|
|
||||||
try:
|
|
||||||
normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
|
|
||||||
except ValueError:
|
|
||||||
normalized_value = datum.value
|
|
||||||
|
|
||||||
# If the normalized value hasn't been seen yet, add it to unique data
|
if isinstance(datum.value, list):
|
||||||
if normalized_value not in seen:
|
# Process each item in the list
|
||||||
unique_data.append(datum)
|
normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value])
|
||||||
seen.add(normalized_value)
|
unique_data.update(normalized_value)
|
||||||
|
else:
|
||||||
return unique_data
|
# Process single value
|
||||||
|
v = float(re.sub(r'[^\d.]', '', str(datum.value)))
|
||||||
|
unique_data.add(v)
|
||||||
|
|
||||||
|
return list(unique_data)
|
||||||
|
|
||||||
|
|
||||||
# should return Restock()
|
# should return Restock()
|
||||||
|
@ -83,14 +88,13 @@ def get_itemprop_availability(html_content) -> Restock:
|
||||||
if price_result:
|
if price_result:
|
||||||
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
|
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
|
||||||
# parse that for the UI?
|
# parse that for the UI?
|
||||||
prices_found = set(str(item.value).replace('$', '') for item in price_result)
|
if len(price_result) > 1 and len(price_result) > 1:
|
||||||
if len(price_result) > 1 and len(prices_found) > 1:
|
|
||||||
# See of all prices are different, in the case that one product has many embedded data types with the same price
|
# See of all prices are different, in the case that one product has many embedded data types with the same price
|
||||||
# One might have $121.95 and another 121.95 etc
|
# One might have $121.95 and another 121.95 etc
|
||||||
logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.")
|
logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.")
|
||||||
raise MoreThanOnePriceFound()
|
raise MoreThanOnePriceFound()
|
||||||
|
|
||||||
value['price'] = price_result[0].value
|
value['price'] = price_result[0]
|
||||||
|
|
||||||
pricecurrency_result = pricecurrency_parse.find(data)
|
pricecurrency_result = pricecurrency_parse.find(data)
|
||||||
if pricecurrency_result:
|
if pricecurrency_result:
|
||||||
|
@ -220,7 +224,7 @@ class perform_site_check(difference_detection_processor):
|
||||||
itemprop_availability['original_price'] = itemprop_availability.get('price')
|
itemprop_availability['original_price'] = itemprop_availability.get('price')
|
||||||
update_obj['restock']["original_price"] = itemprop_availability.get('price')
|
update_obj['restock']["original_price"] = itemprop_availability.get('price')
|
||||||
|
|
||||||
if not self.fetcher.instock_data and not itemprop_availability.get('availability'):
|
if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
|
||||||
raise ProcessorException(
|
raise ProcessorException(
|
||||||
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
|
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
|
||||||
url=watch.get('url'),
|
url=watch.get('url'),
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
# A list of real world examples!
|
||||||
|
|
||||||
|
Always the price should be 666.66 for our tests
|
||||||
|
|
||||||
|
see test_restock_itemprop.py::test_special_prop_examples
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
<div class="PriceSection PriceSection_PriceSection__Vx1_Q PriceSection_variantHuge__P9qxg PdpPriceSection"
|
||||||
|
data-testid="price-section"
|
||||||
|
data-optly-product-tile-price-section="true"><span
|
||||||
|
class="PriceRange ProductPrice variant-huge" itemprop="offers"
|
||||||
|
itemscope="" itemtype="http://schema.org/Offer"><div
|
||||||
|
class="VisuallyHidden_VisuallyHidden__VBD83">$155.55</div><span
|
||||||
|
aria-hidden="true" class="Price variant-huge" data-testid="price"
|
||||||
|
itemprop="price"><sup class="sup" data-testid="price-symbol"
|
||||||
|
itemprop="priceCurrency" content="AUD">$</sup><span
|
||||||
|
class="dollars" data-testid="price-value" itemprop="price"
|
||||||
|
content="155.55">155.55</span><span class="extras"><span class="sup"
|
||||||
|
data-testid="price-sup"></span></span></span></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script type="application/ld+json">{
|
||||||
|
"@type": "Product",
|
||||||
|
"@context": "https://schema.org",
|
||||||
|
"name": "test",
|
||||||
|
"description": "test",
|
||||||
|
"offers": {
|
||||||
|
"@type": "Offer",
|
||||||
|
"priceCurrency": "AUD",
|
||||||
|
"price": 155.55
|
||||||
|
},
|
||||||
|
}</script>
|
|
@ -3,7 +3,7 @@ import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
|
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||||
from ..notification import default_notification_format
|
from ..notification import default_notification_format
|
||||||
|
|
||||||
instock_props = [
|
instock_props = [
|
||||||
|
@ -413,3 +413,31 @@ def test_data_sanity(client, live_server):
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("edit_page", uuid="first"))
|
url_for("edit_page", uuid="first"))
|
||||||
assert test_url2.encode('utf-8') in res.data
|
assert test_url2.encode('utf-8') in res.data
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
# All examples should give a prive of 666.66
|
||||||
|
def test_special_prop_examples(client, live_server):
|
||||||
|
import glob
|
||||||
|
#live_server_setup(live_server)
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt")
|
||||||
|
files = glob.glob(check_path)
|
||||||
|
assert files
|
||||||
|
for test_example_filename in files:
|
||||||
|
with open(test_example_filename, 'r') as example_f:
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as test_f:
|
||||||
|
test_f.write(f"<html><body>{example_f.read()}</body></html>")
|
||||||
|
|
||||||
|
# Now fetch it and check the price worked
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'ception' not in res.data
|
||||||
|
assert b'155.55' in res.data
|
||||||
|
|
Ładowanie…
Reference in New Issue