2023-11-29 09:18:49 +00:00
|
|
|
// import OpenAIApi from 'openai';
|
|
|
|
// import Configuration from 'openai/lib/configuration';
|
|
|
|
const { Configuration, OpenAIApi } = require('openai');
|
|
|
|
// const fetch = require('node-fetch');
|
|
|
|
|
2023-11-29 09:22:24 +00:00
|
|
|
// let fetch;
|
2023-11-29 09:18:49 +00:00
|
|
|
|
2023-11-29 09:22:24 +00:00
|
|
|
// async function loadFetch() {
|
|
|
|
// if (!fetch) {
|
|
|
|
// fetch = (await import('node-fetch')).default;
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args));
|
2023-11-23 16:32:58 +00:00
|
|
|
|
2023-11-24 20:38:02 +00:00
|
|
|
// Function to fetch content from URL using a web scraping service
|
2023-11-22 11:41:01 +00:00
|
|
|
async function fetchContentFromURL(url) {
|
2023-11-24 21:55:28 +00:00
|
|
|
try {
|
|
|
|
const response = await fetch(url);
|
|
|
|
if (!response.ok) {
|
|
|
|
throw new Error(`HTTP error! status: ${response.status}`);
|
|
|
|
}
|
|
|
|
return await response.text();
|
|
|
|
} catch (error) {
|
|
|
|
console.error(`Could not fetch content from URL: ${error}`);
|
|
|
|
throw error;
|
|
|
|
}
|
2023-11-22 12:33:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function simplifyContent(content) {
|
2023-11-24 21:26:54 +00:00
|
|
|
// Denote headings with a marker (like '###') and add a line break
|
|
|
|
content = content.replace(/<h[1-6].*?>(.*?)<\/h[1-6]>/g, '\n### $1\n');
|
2023-11-24 21:15:52 +00:00
|
|
|
// Remove script and style elements and their content
|
|
|
|
let simplifiedContent = content.replace(/<script.*?>.*?<\/script>/gms, '');
|
|
|
|
simplifiedContent = simplifiedContent.replace(/<style.*?>.*?<\/style>/gms, '');
|
|
|
|
// Remove all remaining HTML tags, leaving the inner text
|
|
|
|
simplifiedContent = simplifiedContent.replace(/<[^>]+>/g, '');
|
2023-11-24 22:01:18 +00:00
|
|
|
// Manually replace common HTML entities
|
2023-11-24 21:59:25 +00:00
|
|
|
simplifiedContent = simplifiedContent
|
|
|
|
.replace(/&/g, '&')
|
|
|
|
.replace(/</g, '<')
|
|
|
|
.replace(/>/g, '>')
|
|
|
|
.replace(/"/g, '"')
|
|
|
|
.replace(/'/g, "'");
|
2023-11-24 21:21:55 +00:00
|
|
|
// Remove inline CSS and JavaScript event handlers
|
2023-11-24 21:15:52 +00:00
|
|
|
simplifiedContent = simplifiedContent.replace(/style\s*=\s*'.*?'/gi, '');
|
|
|
|
simplifiedContent = simplifiedContent.replace(/on\w+\s*=\s*".*?"/gi, '');
|
2023-11-24 21:21:55 +00:00
|
|
|
// Normalize whitespace without removing sentence punctuation
|
|
|
|
simplifiedContent = simplifiedContent.replace(/\s+/g, ' ').trim();
|
|
|
|
// Condense multiple line breaks into a single one
|
|
|
|
simplifiedContent = simplifiedContent.replace(/(\r\n|\r|\n){2,}/g, '\n');
|
|
|
|
return simplifiedContent;
|
2023-11-22 12:33:41 +00:00
|
|
|
}
|
|
|
|
|
2023-11-24 21:21:55 +00:00
|
|
|
|
2023-11-22 12:33:41 +00:00
|
|
|
// Placeholder function to perform GPT analysis for media type and topics using Mistral-7b via OpenRouter
|
2023-11-27 10:02:56 +00:00
|
|
|
async function performGPTAnalysis(simplifiedContent, apiKey) {
|
2023-11-22 11:41:01 +00:00
|
|
|
// Implement logic to send content to Mistral-7b via OpenRouter for GPT analysis
|
|
|
|
// Send content and receive GPT analysis response
|
|
|
|
// Placeholder code
|
2023-11-28 17:29:12 +00:00
|
|
|
const inferredMediaType = "article";
|
|
|
|
// const extractedTopics = ["topic1", "topic2"];
|
|
|
|
|
2023-11-28 17:34:21 +00:00
|
|
|
try {
|
2023-11-28 17:44:54 +00:00
|
|
|
const configuration = new Configuration({
|
|
|
|
apiKey: apiKey, // Use the provided API key
|
|
|
|
baseURL: "https://openrouter.ai/api/v1" // Your custom API endpoint
|
|
|
|
});
|
|
|
|
|
|
|
|
const openai = new OpenAIApi(configuration);
|
2023-11-28 17:52:26 +00:00
|
|
|
|
2023-11-28 17:34:21 +00:00
|
|
|
// Using the specified prompt
|
|
|
|
const prompt = `Analyze the following text and provide the media type and key topics: ${simplifiedContent}`;
|
|
|
|
|
|
|
|
const completion = await openai.createCompletion({
|
|
|
|
model: "mistralai/mistral-7b-instruct",
|
|
|
|
prompt: prompt,
|
|
|
|
max_tokens: 150 // Adjust as needed
|
|
|
|
});
|
|
|
|
|
|
|
|
//return completion.data.choices[0].text.trim();
|
|
|
|
return inferredMediaType;
|
|
|
|
} catch (error) {
|
|
|
|
console.error('Error with OpenAI completion:', error);
|
|
|
|
throw error;
|
|
|
|
}
|
|
|
|
// return inferredMediaType;
|
2023-11-22 12:33:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Placeholder function to map inferred values to predefined formats and topics
|
|
|
|
function mapInferredValues(mediaType, topics) {
|
2023-11-22 11:41:01 +00:00
|
|
|
// Implement logic to map inferred media type and topics to predefined formats and topics
|
|
|
|
// Match inferred values with predefined taxonomy
|
|
|
|
// Placeholder code
|
|
|
|
const predefinedMediaType = "Article";
|
|
|
|
const predefinedTopics = ["Topic 1", "Topic 2"];
|
|
|
|
return { predefinedMediaType, predefinedTopics };
|
2023-11-22 12:33:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Placeholder function to format the response
|
|
|
|
function formatResponse(predefinedMediaType, predefinedTopics) {
|
2023-11-22 11:41:01 +00:00
|
|
|
// Implement logic to format the extracted metadata into the desired response structure
|
|
|
|
// Construct the response object
|
|
|
|
// Placeholder code
|
|
|
|
const response = {
|
2023-11-22 20:44:21 +00:00
|
|
|
format: predefinedMediaType,
|
2023-11-23 15:42:06 +00:00
|
|
|
topics: predefinedTopics,
|
2023-11-22 11:41:01 +00:00
|
|
|
};
|
|
|
|
return response;
|
2023-11-22 12:33:41 +00:00
|
|
|
}
|
|
|
|
|
2023-11-29 09:18:49 +00:00
|
|
|
async function handler(event) {
|
2023-11-22 11:41:01 +00:00
|
|
|
try {
|
2023-11-22 12:33:41 +00:00
|
|
|
// Extract URL and API Key from the request body
|
|
|
|
const { url, apiKey } = JSON.parse(event.body);
|
|
|
|
|
|
|
|
// Validate if URL and API Key are present
|
|
|
|
if (!url || !apiKey) {
|
|
|
|
return {
|
|
|
|
statusCode: 400,
|
|
|
|
body: JSON.stringify({ error: 'URL and API Key are required' }),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Step 1: Fetch content from the URL using a web scraping service
|
|
|
|
const fetchedContent = await fetchContentFromURL(url);
|
|
|
|
|
|
|
|
// Step 2: Simplify the fetched content for GPT analysis
|
|
|
|
const simplifiedContent = simplifyContent(fetchedContent);
|
|
|
|
|
|
|
|
// Step 3: Perform GPT analysis for media type and topics
|
2023-11-28 17:44:54 +00:00
|
|
|
const responseText = await performGPTAnalysis(simplifiedContent, apiKey);
|
2023-11-22 12:33:41 +00:00
|
|
|
|
|
|
|
// Step 4: Map inferred values to predefined formats and topics
|
2023-11-28 17:29:12 +00:00
|
|
|
// const { predefinedMediaType, predefinedTopics } = mapInferredValues(inferredMediaType, extractedTopics);
|
2023-11-22 12:33:41 +00:00
|
|
|
|
|
|
|
// Step 5: Format the response
|
2023-11-28 17:29:12 +00:00
|
|
|
// const formattedResponse = formatResponse(predefinedMediaType, predefinedTopics);
|
2023-11-22 12:33:41 +00:00
|
|
|
|
|
|
|
// Return the formatted response
|
|
|
|
return {
|
|
|
|
statusCode: 200,
|
2023-11-28 17:44:54 +00:00
|
|
|
body: JSON.stringify(responseText),
|
2023-11-22 12:33:41 +00:00
|
|
|
};
|
2023-11-22 11:41:01 +00:00
|
|
|
} catch (error) {
|
2023-11-24 20:44:21 +00:00
|
|
|
console.error('Error occurred:', error.message);
|
2023-11-22 12:33:41 +00:00
|
|
|
return {
|
|
|
|
statusCode: 500,
|
2023-11-24 20:44:21 +00:00
|
|
|
body: JSON.stringify({ error: 'Something went wrong', details: error.message }),
|
2023-11-22 12:33:41 +00:00
|
|
|
};
|
2023-11-22 11:41:01 +00:00
|
|
|
}
|
2023-11-29 09:18:49 +00:00
|
|
|
}
|
|
|
|
module.exports = { handler };
|