Restock detection - Check all elements for text to get stock status from, only consider elements inside the viewport, only consider elements more than 100px from the top (avoid menu) , trim any text returned (#2040)

pull/2107/head
dgtlmoon 2024-01-12 23:11:56 +01:00 zatwierdzone przez GitHub
rodzic 65428655b8
commit 1749c07750
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
2 zmienionych plików z 123 dodań i 109 usunięć

Wyświetl plik

@ -61,4 +61,4 @@ class perform_site_check(difference_detection_processor):
# Always record the new checksum # Always record the new checksum
update_obj["previous_md5"] = fetched_md5 update_obj["previous_md5"] = fetched_md5
return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8') return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8').strip()

Wyświetl plik

@ -1,117 +1,131 @@
function isItemInStock() { function isItemInStock() {
// @todo Pass these in so the same list can be used in non-JS fetchers // @todo Pass these in so the same list can be used in non-JS fetchers
const outOfStockTexts = [ const outOfStockTexts = [
' أخبرني عندما يتوفر', ' أخبرني عندما يتوفر',
'0 in stock', '0 in stock',
'agotado', 'agotado',
'article épuisé', 'article épuisé',
'artikel zurzeit vergriffen', 'artikel zurzeit vergriffen',
'as soon as stock is available', 'as soon as stock is available',
'ausverkauft', // sold out 'ausverkauft', // sold out
'available for back order', 'available for back order',
'back-order or out of stock', 'back-order or out of stock',
'backordered', 'backordered',
'benachrichtigt mich', // notify me 'benachrichtigt mich', // notify me
'brak na stanie', 'brak na stanie',
'brak w magazynie', 'brak w magazynie',
'coming soon', 'coming soon',
'currently have any tickets for this', 'currently have any tickets for this',
'currently unavailable', 'currently unavailable',
'dostępne wkrótce', 'dostępne wkrótce',
'en rupture de stock', 'en rupture de stock',
'ist derzeit nicht auf lager', 'ist derzeit nicht auf lager',
'item is no longer available', 'item is no longer available',
'let me know when it\'s available', 'let me know when it\'s available',
'message if back in stock', 'message if back in stock',
'nachricht bei', 'nachricht bei',
'nicht auf lager', 'nicht auf lager',
'nicht lieferbar', 'nicht lieferbar',
'nicht zur verfügung', 'nicht zur verfügung',
'niet beschikbaar', 'niet beschikbaar',
'niet leverbaar', 'niet leverbaar',
'no disponible temporalmente', 'no disponible temporalmente',
'no longer in stock', 'no longer in stock',
'no tickets available', 'no tickets available',
'not available', 'not available',
'not currently available', 'not currently available',
'not in stock', 'not in stock',
'notify me when available', 'notify me when available',
'não estamos a aceitar encomendas', 'não estamos a aceitar encomendas',
'out of stock', 'out of stock',
'out-of-stock', 'out-of-stock',
'produkt niedostępny', 'produkt niedostępny',
'sold out', 'sold out',
'sold-out', 'sold-out',
'temporarily out of stock', 'temporarily out of stock',
'temporarily unavailable', 'temporarily unavailable',
'tickets unavailable', 'tickets unavailable',
'tijdelijk uitverkocht', 'tijdelijk uitverkocht',
'unavailable tickets', 'unavailable tickets',
'we do not currently have an estimate of when this product will be back in stock.', 'we do not currently have an estimate of when this product will be back in stock.',
'zur zeit nicht an lager', 'we don\'t know when or if this item will be back in stock.',
'品切れ', 'zur zeit nicht an lager',
'已售完', '品切れ',
'품절' '已售完',
]; '품절'
];
function getElementBaseText(element) {
const negateOutOfStockRegexs = [ // .textContent can include text from children which may give the wrong results
'[0-9] in stock' // scan only immediate TEXT_NODEs, which will be a child of the element
] var text = "";
var negateOutOfStockRegexs_r = []; for (var i = 0; i < element.childNodes.length; ++i)
for (let i = 0; i < negateOutOfStockRegexs.length; i++) { if (element.childNodes[i].nodeType === Node.TEXT_NODE)
negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g')); text += element.childNodes[i].textContent;
} return text.toLowerCase().trim();
const elementsWithZeroChildren = Array.from(document.getElementsByTagName('*')).filter(element => element.children.length === 0);
// REGEXS THAT REALLY MEAN IT'S IN STOCK
for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) {
const element = elementsWithZeroChildren[i];
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
var elementText="";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase();
} else {
elementText = element.textContent.toLowerCase();
}
if (elementText.length) {
// try which ones could mean its in stock
for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
if (negateOutOfStockRegexs_r[i].test(elementText)) {
return 'Possibly in stock';
}
}
}
} }
}
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK const negateOutOfStockRegexs = [
for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) { '[0-9] in stock'
const element = elementsWithZeroChildren[i]; ]
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) { var negateOutOfStockRegexs_r = [];
var elementText=""; for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
if (element.tagName.toLowerCase() === "input") { negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g'));
elementText = element.value.toLowerCase();
} else {
elementText = element.textContent.toLowerCase();
}
if (elementText.length) {
// and these mean its out of stock
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
return elementText; // item is out of stock
}
}
}
} }
}
return 'Possibly in stock'; // possibly in stock, cant decide otherwise. // The out-of-stock or in-stock-text is generally always above-the-fold
// and often below-the-fold is a list of related products that may or may not contain trigger text
// so it's good to filter to just the 'above the fold' elements
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
const elementsToScan = Array.from(document.getElementsByTagName('*')).filter(element => element.getBoundingClientRect().top + window.scrollY <= window.innerHeight && element.getBoundingClientRect().top + window.scrollY >= 100);
var elementText = "";
// REGEXS THAT REALLY MEAN IT'S IN STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
elementText = "";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase();
} else {
elementText = getElementBaseText(element);
}
if (elementText.length) {
// try which ones could mean its in stock
for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
if (negateOutOfStockRegexs_r[i].test(elementText)) {
return 'Possibly in stock';
}
}
}
}
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
elementText = "";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase();
} else {
elementText = getElementBaseText(element);
}
if (elementText.length) {
// and these mean its out of stock
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
return outOfStockText; // item is out of stock
}
}
}
}
}
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
} }
// returns the element text that makes it think it's out of stock // returns the element text that makes it think it's out of stock
return isItemInStock(); return isItemInStock().trim()