kopia lustrzana https://github.com/learn-awesome/learndb
edited comments
rodzic
5145b0fe3e
commit
c46f1b0f28
|
@ -1,7 +1,6 @@
|
||||||
const fetch = require('node-fetch'); // Import for webscraping in fetchContentFromURL()
|
const fetch = require('node-fetch'); // Import for webscraping in fetchContentFromURL()
|
||||||
import { OpenAIApi, Configuration } from 'openai';
|
import { OpenAIApi, Configuration } from 'openai';
|
||||||
// const { Configuration, OpenAIApi } = require('openai');
|
// const { Configuration, OpenAIApi } = require('openai');
|
||||||
// import { he } from 'he';
|
|
||||||
|
|
||||||
// Function to fetch content from URL using a web scraping service
|
// Function to fetch content from URL using a web scraping service
|
||||||
async function fetchContentFromURL(url) {
|
async function fetchContentFromURL(url) {
|
||||||
|
@ -25,13 +24,7 @@ function simplifyContent(content) {
|
||||||
simplifiedContent = simplifiedContent.replace(/<style.*?>.*?<\/style>/gms, '');
|
simplifiedContent = simplifiedContent.replace(/<style.*?>.*?<\/style>/gms, '');
|
||||||
// Remove all remaining HTML tags, leaving the inner text
|
// Remove all remaining HTML tags, leaving the inner text
|
||||||
simplifiedContent = simplifiedContent.replace(/<[^>]+>/g, '');
|
simplifiedContent = simplifiedContent.replace(/<[^>]+>/g, '');
|
||||||
// // Decode HTML entities - for a Node.js environment, consider using a library like 'he'
|
// Manually replace common HTML entities
|
||||||
// simplifiedContent = simplifiedContent.replace(/&[a-z]+;/gi, match => {
|
|
||||||
// // This part is for browser environments, adjust for Node.js if necessary
|
|
||||||
// const span = document.createElement('span');
|
|
||||||
// span.innerHTML = match;
|
|
||||||
// return span.textContent || span.innerText;
|
|
||||||
// });
|
|
||||||
simplifiedContent = simplifiedContent
|
simplifiedContent = simplifiedContent
|
||||||
.replace(/&/g, '&')
|
.replace(/&/g, '&')
|
||||||
.replace(/</g, '<')
|
.replace(/</g, '<')
|
||||||
|
|
Ładowanie…
Reference in New Issue