Fetching - Small improvement memory handling in detecting price information (saves ~10Mb)

pull/3103/head^2
dgtlmoon 2025-04-11 10:11:59 +02:00 zatwierdzone przez GitHub
rodzic 4269079c54
commit 5f43d988a3
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
1 zmienionych plików z 4 dodań i 2 usunięć

Wyświetl plik

@ -477,8 +477,10 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
def has_ldjson_product_info(content):
try:
lc = content.lower()
if 'application/ld+json' in lc and lc.count('"price"') == 1 and '"pricecurrency"' in lc:
# Better than .lower() which can use a lot of ram
if (re.search(r'application/ld\+json', content, re.IGNORECASE) and
re.search(r'"price"', content, re.IGNORECASE) and
re.search(r'"pricecurrency"', content, re.IGNORECASE)):
return True
# On some pages this is really terribly expensive when they dont really need it