From 259e45451fa8b16b0953d3fefd424c1d43d469d8 Mon Sep 17 00:00:00 2001 From: Huda Joad Date: Sat, 25 Nov 2023 00:10:29 +0300 Subject: [PATCH] modified simplifyContent() --- netlify/functions/handleMetadata.js | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/netlify/functions/handleMetadata.js b/netlify/functions/handleMetadata.js index 33ca3b2..cb34a09 100644 --- a/netlify/functions/handleMetadata.js +++ b/netlify/functions/handleMetadata.js @@ -1,6 +1,6 @@ const fetch = require('node-fetch'); // Import for webscraping in fetchContentFromURL() -// import { OpenAIApi, Configuration } from 'openai'; -const { Configuration, OpenAIApi } = require('openai'); +import { OpenAIApi, Configuration } from 'openai'; +// const { Configuration, OpenAIApi } = require('openai'); // Function to fetch content from URL using a web scraping service async function fetchContentFromURL(url) { @@ -17,24 +17,11 @@ async function fetchContentFromURL(url) { } function simplifyContent(content) { - // Remove HTML tags - // let simplifiedContent = content.replace(/<[^>]*>/g, ''); - // // Remove CSS within style tags - // simplifiedContent = simplifiedContent.replace(/]*>.*<\/style>/gms, ''); - // // Remove inline CSS and JavaScript within script tags - // simplifiedContent = simplifiedContent.replace(/]*>.*<\/script>/gms, ''); - // // Remove special characters and HTML entities - // simplifiedContent = simplifiedContent.replace(/[^\w\s]/gi, '').replace(/&[a-z]+;/gi, ''); - // // Remove URLs - // simplifiedContent = simplifiedContent.replace(/https?:\/\/[^\s]+/gi, ''); - // // Normalize whitespace - // simplifiedContent = simplifiedContent.replace(/\s+/g, ' ').trim(); - // // Basic language simplification - // simplifiedContent = simplifiedContent.toLowerCase(); - // // Simple summarization: taking the first few sentences - // const sentences = simplifiedContent.split('. '); - // const summarizedContent = sentences.slice(0, Math.min(5, sentences.length)).join('. '); - simplifiedContent = "hello maria"; + let simplifiedContent = content.replace(/<[^>]*>/g, ''); // Remove HTML tags + simplifiedContent = simplifiedContent.replace(/]*>.*<\/style>/gms, ''); // Remove CSS + simplifiedContent = simplifiedContent.replace(/]*>.*<\/script>/gms, ''); // Remove JS + simplifiedContent = simplifiedContent.replace(/[^\w\s]/gi, ''); // Remove special characters + simplifiedContent = simplifiedContent.replace(/\s+/g, ' ').trim(); // Normalize whitespace return simplifiedContent; }