modified simplifyContent()

pull/73/head
Huda Joad 2023-11-25 00:10:29 +03:00
rodzic 9cdf9e6e45
commit 259e45451f
1 zmienionych plików z 7 dodań i 20 usunięć

Wyświetl plik

@ -1,6 +1,6 @@
const fetch = require('node-fetch'); // Import for webscraping in fetchContentFromURL()
// import { OpenAIApi, Configuration } from 'openai';
const { Configuration, OpenAIApi } = require('openai');
import { OpenAIApi, Configuration } from 'openai';
// const { Configuration, OpenAIApi } = require('openai');
// Function to fetch content from URL using a web scraping service
async function fetchContentFromURL(url) {
@ -17,24 +17,11 @@ async function fetchContentFromURL(url) {
}
function simplifyContent(content) {
// Remove HTML tags
// let simplifiedContent = content.replace(/<[^>]*>/g, '');
// // Remove CSS within style tags
// simplifiedContent = simplifiedContent.replace(/<style[^>]*>.*<\/style>/gms, '');
// // Remove inline CSS and JavaScript within script tags
// simplifiedContent = simplifiedContent.replace(/<script[^>]*>.*<\/script>/gms, '');
// // Remove special characters and HTML entities
// simplifiedContent = simplifiedContent.replace(/[^\w\s]/gi, '').replace(/&[a-z]+;/gi, '');
// // Remove URLs
// simplifiedContent = simplifiedContent.replace(/https?:\/\/[^\s]+/gi, '');
// // Normalize whitespace
// simplifiedContent = simplifiedContent.replace(/\s+/g, ' ').trim();
// // Basic language simplification
// simplifiedContent = simplifiedContent.toLowerCase();
// // Simple summarization: taking the first few sentences
// const sentences = simplifiedContent.split('. ');
// const summarizedContent = sentences.slice(0, Math.min(5, sentences.length)).join('. ');
simplifiedContent = "hello maria";
let simplifiedContent = content.replace(/<[^>]*>/g, ''); // Remove HTML tags
simplifiedContent = simplifiedContent.replace(/<style[^>]*>.*<\/style>/gms, ''); // Remove CSS
simplifiedContent = simplifiedContent.replace(/<script[^>]*>.*<\/script>/gms, ''); // Remove JS
simplifiedContent = simplifiedContent.replace(/[^\w\s]/gi, ''); // Remove special characters
simplifiedContent = simplifiedContent.replace(/\s+/g, ' ').trim(); // Normalize whitespace
return simplifiedContent;
}