kopia lustrzana https://github.com/learn-awesome/learndb
modified simplifyContent()
rodzic
9cdf9e6e45
commit
259e45451f
|
@ -1,6 +1,6 @@
|
|||
const fetch = require('node-fetch'); // Import for webscraping in fetchContentFromURL()
|
||||
// import { OpenAIApi, Configuration } from 'openai';
|
||||
const { Configuration, OpenAIApi } = require('openai');
|
||||
import { OpenAIApi, Configuration } from 'openai';
|
||||
// const { Configuration, OpenAIApi } = require('openai');
|
||||
|
||||
// Function to fetch content from URL using a web scraping service
|
||||
async function fetchContentFromURL(url) {
|
||||
|
@ -17,24 +17,11 @@ async function fetchContentFromURL(url) {
|
|||
}
|
||||
|
||||
function simplifyContent(content) {
|
||||
// Remove HTML tags
|
||||
// let simplifiedContent = content.replace(/<[^>]*>/g, '');
|
||||
// // Remove CSS within style tags
|
||||
// simplifiedContent = simplifiedContent.replace(/<style[^>]*>.*<\/style>/gms, '');
|
||||
// // Remove inline CSS and JavaScript within script tags
|
||||
// simplifiedContent = simplifiedContent.replace(/<script[^>]*>.*<\/script>/gms, '');
|
||||
// // Remove special characters and HTML entities
|
||||
// simplifiedContent = simplifiedContent.replace(/[^\w\s]/gi, '').replace(/&[a-z]+;/gi, '');
|
||||
// // Remove URLs
|
||||
// simplifiedContent = simplifiedContent.replace(/https?:\/\/[^\s]+/gi, '');
|
||||
// // Normalize whitespace
|
||||
// simplifiedContent = simplifiedContent.replace(/\s+/g, ' ').trim();
|
||||
// // Basic language simplification
|
||||
// simplifiedContent = simplifiedContent.toLowerCase();
|
||||
// // Simple summarization: taking the first few sentences
|
||||
// const sentences = simplifiedContent.split('. ');
|
||||
// const summarizedContent = sentences.slice(0, Math.min(5, sentences.length)).join('. ');
|
||||
simplifiedContent = "hello maria";
|
||||
let simplifiedContent = content.replace(/<[^>]*>/g, ''); // Remove HTML tags
|
||||
simplifiedContent = simplifiedContent.replace(/<style[^>]*>.*<\/style>/gms, ''); // Remove CSS
|
||||
simplifiedContent = simplifiedContent.replace(/<script[^>]*>.*<\/script>/gms, ''); // Remove JS
|
||||
simplifiedContent = simplifiedContent.replace(/[^\w\s]/gi, ''); // Remove special characters
|
||||
simplifiedContent = simplifiedContent.replace(/\s+/g, ' ').trim(); // Normalize whitespace
|
||||
return simplifiedContent;
|
||||
}
|
||||
|
||||
|
|
Ładowanie…
Reference in New Issue