kopia lustrzana https://github.com/learn-awesome/learndb
updated package.json to type module, updated import to match commonjs syntax
rodzic
9cd0345098
commit
a603edd790
|
@ -1,6 +1,7 @@
|
||||||
const https = require('https'); // Import for webscraping (fetchContentFromURL(url) function
|
const https = require('https'); // Import for webscraping (fetchContentFromURL(url) function
|
||||||
import { OpenAIApi, Configuration } from 'openai';
|
import { OpenAIApi, Configuration } from 'openai';
|
||||||
import { fetch } from 'node-fetch';
|
// import { fetch } from 'node-fetch';
|
||||||
|
const fetch = require('node-fetch');
|
||||||
|
|
||||||
// Function to fetch content from URL using a web scraping service
|
// Function to fetch content from URL using a web scraping service
|
||||||
async function fetchContentFromURL(url) {
|
async function fetchContentFromURL(url) {
|
||||||
|
@ -18,22 +19,23 @@ async function fetchContentFromURL(url) {
|
||||||
|
|
||||||
function simplifyContent(content) {
|
function simplifyContent(content) {
|
||||||
// Remove HTML tags
|
// Remove HTML tags
|
||||||
let simplifiedContent = content.replace(/<[^>]*>/g, '');
|
// let simplifiedContent = content.replace(/<[^>]*>/g, '');
|
||||||
// Remove CSS within style tags
|
// // Remove CSS within style tags
|
||||||
simplifiedContent = simplifiedContent.replace(/<style[^>]*>.*<\/style>/gms, '');
|
// simplifiedContent = simplifiedContent.replace(/<style[^>]*>.*<\/style>/gms, '');
|
||||||
// Remove inline CSS and JavaScript within script tags
|
// // Remove inline CSS and JavaScript within script tags
|
||||||
simplifiedContent = simplifiedContent.replace(/<script[^>]*>.*<\/script>/gms, '');
|
// simplifiedContent = simplifiedContent.replace(/<script[^>]*>.*<\/script>/gms, '');
|
||||||
// Remove special characters and HTML entities
|
// // Remove special characters and HTML entities
|
||||||
simplifiedContent = simplifiedContent.replace(/[^\w\s]/gi, '').replace(/&[a-z]+;/gi, '');
|
// simplifiedContent = simplifiedContent.replace(/[^\w\s]/gi, '').replace(/&[a-z]+;/gi, '');
|
||||||
// Remove URLs
|
// // Remove URLs
|
||||||
simplifiedContent = simplifiedContent.replace(/https?:\/\/[^\s]+/gi, '');
|
// simplifiedContent = simplifiedContent.replace(/https?:\/\/[^\s]+/gi, '');
|
||||||
// Normalize whitespace
|
// // Normalize whitespace
|
||||||
simplifiedContent = simplifiedContent.replace(/\s+/g, ' ').trim();
|
// simplifiedContent = simplifiedContent.replace(/\s+/g, ' ').trim();
|
||||||
// Basic language simplification
|
// // Basic language simplification
|
||||||
simplifiedContent = simplifiedContent.toLowerCase();
|
// simplifiedContent = simplifiedContent.toLowerCase();
|
||||||
// // Simple summarization: taking the first few sentences
|
// // Simple summarization: taking the first few sentences
|
||||||
// const sentences = simplifiedContent.split('. ');
|
// const sentences = simplifiedContent.split('. ');
|
||||||
// const summarizedContent = sentences.slice(0, Math.min(5, sentences.length)).join('. ');
|
// const summarizedContent = sentences.slice(0, Math.min(5, sentences.length)).join('. ');
|
||||||
|
simplifiedContent = "hello maria";
|
||||||
return simplifiedContent;
|
return simplifiedContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"description": "A Netlify function to handle metadata extraction and analysis",
|
"description": "A Netlify function to handle metadata extraction and analysis",
|
||||||
"main": "handleMetadata.js",
|
"main": "handleMetadata.js",
|
||||||
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "node handleMetadata.js"
|
"start": "node handleMetadata.js"
|
||||||
},
|
},
|
||||||
|
|
Ładowanie…
Reference in New Issue