slightly modified simplifyContent() to preserve more tags

pull/76/head
Huda Joad 2023-12-04 23:57:46 +03:00
rodzic d9735112c5
commit ded050d616
1 zmienionych plików z 2 dodań i 6 usunięć

Wyświetl plik

@ -20,9 +20,6 @@ function simplifyContent(content) {
// Preserve the title tag and its content
let title = content.match(/<title.*?>(.*?)<\/title>/i);
title = title ? title[1] : '';
// Preserve the body tag and its content
let body = content.match(/<body.*?>(.*?)<\/body>/i);
body = body ? body[1] : '';
// Extract the body content, if present
let bodyContent = '';
@ -38,8 +35,8 @@ function simplifyContent(content) {
bodyContent = bodyContent.replace(/<script.*?>.*?<\/script>/gms, '');
bodyContent = bodyContent.replace(/<style.*?>.*?<\/style>/gms, '');
// Remove all remaining HTML tags, except for title and body
bodyContent = bodyContent.replace(/<(?!\/?title|\/?body)([^>]+)>/g, '');
// Remove all remaining HTML tags, except for title, body, h1-h6, p, and a
bodyContent = bodyContent.replace(/<(?!\/?(title|body|h[1-6]|p|a)( [^>]*)?>)([^>]+)>/g, '');
// Manually replace common HTML entities
bodyContent = bodyContent
@ -64,7 +61,6 @@ function simplifyContent(content) {
return simplifiedContent;
}
// Placeholder function to perform GPT analysis for media type and topics using Mistral-7b via OpenRouter
async function performGPTAnalysis(simplifiedContent, apiKey) {
// Implement logic to send content to Mistral-7b via OpenRouter for GPT analysis