kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
feat: add ChatGPTAPIBrowser for increased robustness; less efficient but less 429/403/503 errors
rodzic
cd5a12641e
commit
120d7e05db
|
@ -1,7 +1,7 @@
|
|||
import dotenv from 'dotenv-safe'
|
||||
import { oraPromise } from 'ora'
|
||||
|
||||
import { ChatGPTAPI, getOpenAIAuth } from '../src'
|
||||
import { ChatGPTAPIBrowser } from '../src'
|
||||
|
||||
dotenv.config()
|
||||
|
||||
|
@ -16,13 +16,9 @@ async function main() {
|
|||
const email = process.env.OPENAI_EMAIL
|
||||
const password = process.env.OPENAI_PASSWORD
|
||||
|
||||
const authInfo = await getOpenAIAuth({
|
||||
email,
|
||||
password
|
||||
})
|
||||
|
||||
const api = new ChatGPTAPI({ ...authInfo })
|
||||
await api.ensureAuth()
|
||||
const api = new ChatGPTAPIBrowser({ email, password })
|
||||
const res = await api.init()
|
||||
console.log('init result', res)
|
||||
|
||||
const prompt =
|
||||
'Write a python version of bubble sort. Do not include example usage.'
|
||||
|
@ -31,6 +27,7 @@ async function main() {
|
|||
text: prompt
|
||||
})
|
||||
|
||||
await api.close()
|
||||
return response
|
||||
}
|
||||
|
||||
|
|
|
@ -38,8 +38,10 @@
|
|||
"delay": "^5.0.0",
|
||||
"eventsource-parser": "^0.0.5",
|
||||
"expiry-map": "^2.0.0",
|
||||
"html-to-md": "^0.8.3",
|
||||
"p-timeout": "^6.0.0",
|
||||
"puppeteer-extra": "^3.3.4",
|
||||
"puppeteer-extra-plugin-recaptcha": "^3.6.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.1",
|
||||
"remark": "^14.0.2",
|
||||
"strip-markdown": "^5.0.0",
|
||||
|
|
|
@ -10,6 +10,7 @@ specifiers:
|
|||
dotenv-safe: ^8.2.0
|
||||
eventsource-parser: ^0.0.5
|
||||
expiry-map: ^2.0.0
|
||||
html-to-md: ^0.8.3
|
||||
husky: ^8.0.2
|
||||
lint-staged: ^13.0.3
|
||||
npm-run-all: ^4.1.5
|
||||
|
@ -18,6 +19,7 @@ specifiers:
|
|||
prettier: ^2.8.0
|
||||
puppeteer: ^19.4.0
|
||||
puppeteer-extra: ^3.3.4
|
||||
puppeteer-extra-plugin-recaptcha: ^3.6.6
|
||||
puppeteer-extra-plugin-stealth: ^2.11.1
|
||||
remark: ^14.0.2
|
||||
strip-markdown: ^5.0.0
|
||||
|
@ -32,8 +34,10 @@ dependencies:
|
|||
delay: 5.0.0
|
||||
eventsource-parser: 0.0.5
|
||||
expiry-map: 2.0.0
|
||||
html-to-md: 0.8.3
|
||||
p-timeout: 6.0.0
|
||||
puppeteer-extra: 3.3.4_puppeteer@19.4.0
|
||||
puppeteer-extra-plugin-recaptcha: 3.6.6_puppeteer-extra@3.3.4
|
||||
puppeteer-extra-plugin-stealth: 2.11.1_puppeteer-extra@3.3.4
|
||||
remark: 14.0.2
|
||||
strip-markdown: 5.0.0
|
||||
|
@ -1789,6 +1793,10 @@ packages:
|
|||
lru-cache: 6.0.0
|
||||
dev: true
|
||||
|
||||
/html-to-md/0.8.3:
|
||||
resolution: {integrity: sha512-Va+bB1YOdD6vMRDue9/l7YxbERgwOgsos4erUDRfRN6YE0B2Wbbw8uAj6xZJk9A9vrjVy7mG/WLlhDw6RXfgsA==}
|
||||
dev: false
|
||||
|
||||
/https-proxy-agent/5.0.1:
|
||||
resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==}
|
||||
engines: {node: '>= 6'}
|
||||
|
@ -3065,6 +3073,26 @@ packages:
|
|||
- supports-color
|
||||
- utf-8-validate
|
||||
|
||||
/puppeteer-extra-plugin-recaptcha/3.6.6_puppeteer-extra@3.3.4:
|
||||
resolution: {integrity: sha512-SVbmL+igGX8m0Qg9dn85trWDghbfUCTG/QUHYscYx5XgMZVVb0/v0a6MqbPdHoKmBx5BS2kLd6rorMlncMcXdw==}
|
||||
engines: {node: '>=9.11.2'}
|
||||
peerDependencies:
|
||||
playwright-extra: '*'
|
||||
puppeteer-extra: '*'
|
||||
peerDependenciesMeta:
|
||||
playwright-extra:
|
||||
optional: true
|
||||
puppeteer-extra:
|
||||
optional: true
|
||||
dependencies:
|
||||
debug: 4.3.4
|
||||
merge-deep: 3.0.3
|
||||
puppeteer-extra: 3.3.4_puppeteer@19.4.0
|
||||
puppeteer-extra-plugin: 3.2.2_puppeteer-extra@3.3.4
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
dev: false
|
||||
|
||||
/puppeteer-extra-plugin-stealth/2.11.1_puppeteer-extra@3.3.4:
|
||||
resolution: {integrity: sha512-n0wdC0Ilc9tk5L6FWLyd0P2gT8b2fp+2NuB+KB0oTSw3wXaZ0D6WNakjJsayJ4waGzIJFCUHkmK9zgx5NKMoFw==}
|
||||
engines: {node: '>=8'}
|
||||
|
|
|
@ -0,0 +1,236 @@
|
|||
import delay from 'delay'
|
||||
import html2md from 'html-to-md'
|
||||
import { type Browser, type HTTPResponse, type Page } from 'puppeteer'
|
||||
|
||||
import * as types from './types'
|
||||
import { getBrowser, getOpenAIAuth } from './openai-auth'
|
||||
|
||||
export class ChatGPTAPIBrowser {
|
||||
protected _markdown: boolean
|
||||
protected _debug: boolean
|
||||
protected _isGoogleLogin: boolean
|
||||
protected _captchaToken: string
|
||||
|
||||
protected _email: string
|
||||
protected _password: string
|
||||
|
||||
protected _browser: Browser
|
||||
protected _page: Page
|
||||
|
||||
/**
|
||||
* Creates a new client wrapper for automating the ChatGPT webapp.
|
||||
*/
|
||||
constructor(opts: {
|
||||
email: string
|
||||
password: string
|
||||
|
||||
/** @defaultValue `true` **/
|
||||
markdown?: boolean
|
||||
|
||||
/** @defaultValue `false` **/
|
||||
debug?: boolean
|
||||
|
||||
isGoogleLogin?: boolean
|
||||
captchaToken?: string
|
||||
}) {
|
||||
const {
|
||||
email,
|
||||
password,
|
||||
markdown = true,
|
||||
debug = false,
|
||||
isGoogleLogin = false,
|
||||
captchaToken
|
||||
} = opts
|
||||
|
||||
this._email = email
|
||||
this._password = password
|
||||
|
||||
this._markdown = !!markdown
|
||||
this._debug = !!debug
|
||||
this._isGoogleLogin = !!isGoogleLogin
|
||||
this._captchaToken = captchaToken
|
||||
}
|
||||
|
||||
async init() {
|
||||
if (this._browser) {
|
||||
await this._browser.close()
|
||||
this._page = null
|
||||
this._browser = null
|
||||
}
|
||||
|
||||
this._browser = await getBrowser({ captchaToken: this._captchaToken })
|
||||
this._page =
|
||||
(await this._browser.pages())[0] || (await this._browser.newPage())
|
||||
|
||||
// bypass cloudflare and login
|
||||
await getOpenAIAuth({
|
||||
email: this._email,
|
||||
password: this._password,
|
||||
browser: this._browser,
|
||||
page: this._page,
|
||||
isGoogleLogin: this._isGoogleLogin
|
||||
})
|
||||
|
||||
const chatUrl = 'https://chat.openai.com/chat'
|
||||
const url = this._page.url().replace(/\/$/, '')
|
||||
|
||||
if (url !== chatUrl) {
|
||||
await this._page.goto(chatUrl, {
|
||||
waitUntil: 'networkidle0'
|
||||
})
|
||||
}
|
||||
|
||||
// dismiss welcome modal
|
||||
do {
|
||||
const modalSelector = '[data-headlessui-state="open"]'
|
||||
|
||||
if (!(await this._page.$(modalSelector))) {
|
||||
break
|
||||
}
|
||||
|
||||
try {
|
||||
await this._page.click(`${modalSelector} button:last-child`)
|
||||
} catch (err) {
|
||||
// "next" button not found in welcome modal
|
||||
break
|
||||
}
|
||||
|
||||
await delay(500)
|
||||
} while (true)
|
||||
|
||||
if (!this.getIsAuthenticated()) {
|
||||
return false
|
||||
}
|
||||
|
||||
// this._page.on('response', this._onResponse.bind(this))
|
||||
return true
|
||||
}
|
||||
|
||||
// _onResponse = (response: HTTPResponse) => {
|
||||
// const request = response.request()
|
||||
|
||||
// console.log('response', {
|
||||
// url: response.url(),
|
||||
// ok: response.ok(),
|
||||
// status: response.status(),
|
||||
// statusText: response.statusText(),
|
||||
// headers: response.headers(),
|
||||
// request: {
|
||||
// method: request.method(),
|
||||
// headers: request.headers()
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
|
||||
async getIsAuthenticated() {
|
||||
try {
|
||||
const inputBox = await this._getInputBox()
|
||||
return !!inputBox
|
||||
} catch (err) {
|
||||
// can happen when navigating during login
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
async getLastMessage(): Promise<string | null> {
|
||||
const messages = await this.getMessages()
|
||||
|
||||
if (messages) {
|
||||
return messages[messages.length - 1]
|
||||
} else {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
async getPrompts(): Promise<string[]> {
|
||||
// Get all prompts
|
||||
const messages = await this._page.$$(
|
||||
'.text-base:has(.whitespace-pre-wrap):not(:has(button:nth-child(2))) .whitespace-pre-wrap'
|
||||
)
|
||||
|
||||
// Prompts are always plaintext
|
||||
return Promise.all(messages.map((a) => a.evaluate((el) => el.textContent)))
|
||||
}
|
||||
|
||||
async getMessages(): Promise<string[]> {
|
||||
// Get all complete messages
|
||||
// (in-progress messages that are being streamed back don't contain action buttons)
|
||||
const messages = await this._page.$$(
|
||||
'.text-base:has(.whitespace-pre-wrap):has(button:nth-child(2)) .whitespace-pre-wrap'
|
||||
)
|
||||
|
||||
if (this._markdown) {
|
||||
const htmlMessages = await Promise.all(
|
||||
messages.map((a) => a.evaluate((el) => el.innerHTML))
|
||||
)
|
||||
|
||||
const markdownMessages = htmlMessages.map((messageHtml) => {
|
||||
// parse markdown from message HTML
|
||||
messageHtml = messageHtml.replace('Copy code</button>', '</button>')
|
||||
return html2md(messageHtml, {
|
||||
ignoreTags: [
|
||||
'button',
|
||||
'svg',
|
||||
'style',
|
||||
'form',
|
||||
'noscript',
|
||||
'script',
|
||||
'meta',
|
||||
'head'
|
||||
],
|
||||
skipTags: ['button', 'svg']
|
||||
})
|
||||
})
|
||||
|
||||
return markdownMessages
|
||||
} else {
|
||||
// plaintext
|
||||
const plaintextMessages = await Promise.all(
|
||||
messages.map((a) => a.evaluate((el) => el.textContent))
|
||||
)
|
||||
return plaintextMessages
|
||||
}
|
||||
}
|
||||
|
||||
async sendMessage(message: string): Promise<string> {
|
||||
const inputBox = await this._getInputBox()
|
||||
if (!inputBox) throw new Error('not signed in')
|
||||
|
||||
const lastMessage = await this.getLastMessage()
|
||||
|
||||
await inputBox.click()
|
||||
await inputBox.type(message, { delay: 0 })
|
||||
await inputBox.press('Enter')
|
||||
|
||||
do {
|
||||
await delay(1000)
|
||||
|
||||
// TODO: this logic needs some work because we can have repeat messages...
|
||||
const newLastMessage = await this.getLastMessage()
|
||||
if (
|
||||
newLastMessage &&
|
||||
lastMessage?.toLowerCase() !== newLastMessage?.toLowerCase()
|
||||
) {
|
||||
return newLastMessage
|
||||
}
|
||||
} while (true)
|
||||
}
|
||||
|
||||
async resetThread() {
|
||||
const resetButton = await this._page.$('nav > a:nth-child(1)')
|
||||
if (!resetButton) throw new Error('not signed in')
|
||||
|
||||
await resetButton.click()
|
||||
}
|
||||
|
||||
async close() {
|
||||
await this._browser.close()
|
||||
this._page = null
|
||||
this._browser = null
|
||||
}
|
||||
|
||||
protected async _getInputBox() {
|
||||
// [data-id="root"]
|
||||
return this._page.$('textarea')
|
||||
}
|
||||
}
|
|
@ -95,6 +95,7 @@ export class ChatGPTAPI {
|
|||
'user-agent': this._userAgent,
|
||||
'x-openai-assistant-app-id': '',
|
||||
'accept-language': 'en-US,en;q=0.9',
|
||||
'accept-encoding': 'gzip, deflate, br',
|
||||
origin: 'https://chat.openai.com',
|
||||
referer: 'https://chat.openai.com/chat',
|
||||
'sec-ch-ua':
|
||||
|
@ -299,6 +300,45 @@ export class ChatGPTAPI {
|
|||
}
|
||||
}
|
||||
|
||||
async sendModeration(input: string) {
|
||||
const accessToken = await this.refreshAccessToken()
|
||||
const url = `${this._backendApiBaseUrl}/moderations`
|
||||
const headers = {
|
||||
...this._headers,
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
Accept: '*/*',
|
||||
'Content-Type': 'application/json',
|
||||
Cookie: `cf_clearance=${this._clearanceToken}`
|
||||
}
|
||||
|
||||
const body: types.ModerationsJSONBody = {
|
||||
input,
|
||||
model: 'text-moderation-playground'
|
||||
}
|
||||
|
||||
if (this._debug) {
|
||||
console.log('POST', url, headers, body)
|
||||
}
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body)
|
||||
}).then((r) => {
|
||||
if (!r.ok) {
|
||||
const error = new types.ChatGPTError(`${r.status} ${r.statusText}`)
|
||||
error.response = r
|
||||
error.statusCode = r.status
|
||||
error.statusText = r.statusText
|
||||
throw error
|
||||
}
|
||||
|
||||
return r.json() as any as types.ModerationsJSONResult
|
||||
})
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns `true` if the client has a valid acces token or `false` if refreshing
|
||||
* the token fails.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
export * from './chatgpt-api'
|
||||
export * from './chatgpt-api-browser'
|
||||
export * from './chatgpt-conversation'
|
||||
export * from './types'
|
||||
export * from './utils'
|
||||
|
|
|
@ -10,12 +10,15 @@ import {
|
|||
type PuppeteerLaunchOptions
|
||||
} from 'puppeteer'
|
||||
import puppeteer from 'puppeteer-extra'
|
||||
import RecaptchaPlugin from 'puppeteer-extra-plugin-recaptcha'
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth'
|
||||
|
||||
import * as types from './types'
|
||||
|
||||
puppeteer.use(StealthPlugin())
|
||||
|
||||
let hasRecaptchaPlugin = false
|
||||
|
||||
/**
|
||||
* Represents everything that's required to pass into `ChatGPTAPI` in order
|
||||
* to authenticate with the unofficial ChatGPT API.
|
||||
|
@ -46,47 +49,64 @@ export async function getOpenAIAuth({
|
|||
email,
|
||||
password,
|
||||
browser,
|
||||
page,
|
||||
timeoutMs = 2 * 60 * 1000,
|
||||
isGoogleLogin = false
|
||||
// TODO: temporary for testing...
|
||||
// timeoutMs = 60 * 60 * 1000,
|
||||
isGoogleLogin = false,
|
||||
captchaToken = process.env.CAPTCHA_TOKEN
|
||||
}: {
|
||||
email?: string
|
||||
password?: string
|
||||
browser?: Browser
|
||||
page?: Page
|
||||
timeoutMs?: number
|
||||
isGoogleLogin?: boolean
|
||||
captchaToken?: string
|
||||
}): Promise<OpenAIAuth> {
|
||||
let page: Page
|
||||
let origBrowser = browser
|
||||
const origBrowser = browser
|
||||
const origPage = page
|
||||
|
||||
try {
|
||||
if (!browser) {
|
||||
browser = await getBrowser()
|
||||
browser = await getBrowser({ captchaToken })
|
||||
}
|
||||
|
||||
const userAgent = await browser.userAgent()
|
||||
page = (await browser.pages())[0] || (await browser.newPage())
|
||||
page.setDefaultTimeout(timeoutMs)
|
||||
if (!page) {
|
||||
page = (await browser.pages())[0] || (await browser.newPage())
|
||||
page.setDefaultTimeout(timeoutMs)
|
||||
}
|
||||
|
||||
await page.goto('https://chat.openai.com/auth/login')
|
||||
await page.goto('https://chat.openai.com/auth/login', {
|
||||
waitUntil: 'networkidle0'
|
||||
})
|
||||
|
||||
// NOTE: this is where you may encounter a CAPTCHA
|
||||
if (hasRecaptchaPlugin) {
|
||||
await page.solveRecaptchas()
|
||||
}
|
||||
|
||||
await checkForChatGPTAtCapacity(page)
|
||||
|
||||
await page.waitForSelector('#__next .btn-primary', { timeout: timeoutMs })
|
||||
|
||||
// once we get to this point, the Cloudflare cookies are available
|
||||
await delay(1000)
|
||||
// once we get to this point, the Cloudflare cookies should be available
|
||||
|
||||
// login as well (optional)
|
||||
if (email && password) {
|
||||
await page.waitForSelector('#__next .btn-primary', { timeout: timeoutMs })
|
||||
await delay(500)
|
||||
|
||||
await Promise.all([
|
||||
// click login button
|
||||
page.click('#__next .btn-primary'),
|
||||
page.waitForNavigation({
|
||||
waitUntil: 'networkidle0'
|
||||
})
|
||||
])
|
||||
|
||||
let submitP: Promise<void>
|
||||
await checkForChatGPTAtCapacity(page)
|
||||
|
||||
let submitP: () => Promise<void>
|
||||
|
||||
if (isGoogleLogin) {
|
||||
await page.click('button[data-provider="google"]')
|
||||
|
@ -98,19 +118,25 @@ export async function getOpenAIAuth({
|
|||
])
|
||||
await page.waitForSelector('input[type="password"]', { visible: true })
|
||||
await page.type('input[type="password"]', password, { delay: 10 })
|
||||
submitP = page.keyboard.press('Enter')
|
||||
submitP = () => page.keyboard.press('Enter')
|
||||
} else {
|
||||
await page.waitForSelector('#username')
|
||||
await page.type('#username', email, { delay: 10 })
|
||||
await page.type('#username', email, { delay: 20 })
|
||||
await delay(100)
|
||||
|
||||
if (hasRecaptchaPlugin) {
|
||||
console.log('solveRecaptchas()')
|
||||
const res = await page.solveRecaptchas()
|
||||
console.log('solveRecaptchas result', res)
|
||||
}
|
||||
|
||||
await page.click('button[type="submit"]')
|
||||
await page.waitForSelector('#password')
|
||||
await page.type('#password', password, { delay: 10 })
|
||||
submitP = page.click('button[type="submit"]')
|
||||
submitP = () => page.click('button[type="submit"]')
|
||||
}
|
||||
|
||||
await Promise.all([
|
||||
submitP,
|
||||
|
||||
new Promise<void>((resolve, reject) => {
|
||||
let resolved = false
|
||||
|
||||
|
@ -151,7 +177,9 @@ export async function getOpenAIAuth({
|
|||
})
|
||||
|
||||
setTimeout(waitForCapacityText, 500)
|
||||
})
|
||||
}),
|
||||
|
||||
submitP()
|
||||
])
|
||||
}
|
||||
|
||||
|
@ -170,11 +198,10 @@ export async function getOpenAIAuth({
|
|||
|
||||
return authInfo
|
||||
} catch (err) {
|
||||
console.error(err)
|
||||
throw err
|
||||
} finally {
|
||||
if (origBrowser) {
|
||||
if (page) {
|
||||
if (page && page !== origPage) {
|
||||
await page.close()
|
||||
}
|
||||
} else if (browser) {
|
||||
|
@ -191,7 +218,28 @@ export async function getOpenAIAuth({
|
|||
* able to use the built-in `puppeteer` version of Chromium because Cloudflare
|
||||
* recognizes it and blocks access.
|
||||
*/
|
||||
export async function getBrowser(launchOptions?: PuppeteerLaunchOptions) {
|
||||
export async function getBrowser(
|
||||
opts: PuppeteerLaunchOptions & {
|
||||
captchaToken?: string
|
||||
} = {}
|
||||
) {
|
||||
const { captchaToken = process.env.CAPTCHA_TOKEN, ...launchOptions } = opts
|
||||
|
||||
if (captchaToken && !hasRecaptchaPlugin) {
|
||||
hasRecaptchaPlugin = true
|
||||
console.log('use captcha', captchaToken)
|
||||
|
||||
puppeteer.use(
|
||||
RecaptchaPlugin({
|
||||
provider: {
|
||||
id: '2captcha',
|
||||
token: captchaToken
|
||||
},
|
||||
visualFeedback: true // colorize reCAPTCHAs (violet = detected, green = solved)
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
return puppeteer.launch({
|
||||
headless: false,
|
||||
args: ['--no-sandbox', '--exclude-switches', 'enable-automation'],
|
||||
|
@ -212,16 +260,17 @@ export const defaultChromeExecutablePath = (): string => {
|
|||
case 'darwin':
|
||||
return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
|
||||
|
||||
default:
|
||||
default: {
|
||||
/**
|
||||
* Since two (2) separate chrome releases exists on linux
|
||||
* we first do a check to ensure we're executing the right one.
|
||||
* Since two (2) separate chrome releases exist on linux, we first do a
|
||||
* check to ensure we're executing the right one.
|
||||
*/
|
||||
const chromeExists = fs.existsSync('/usr/bin/google-chrome')
|
||||
|
||||
return chromeExists
|
||||
? '/usr/bin/google-chrome'
|
||||
: '/usr/bin/google-chrome-stable'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -231,6 +280,12 @@ async function checkForChatGPTAtCapacity(page: Page) {
|
|||
try {
|
||||
// res = await page.$('[role="alert"]')
|
||||
res = await page.$x("//div[contains(., 'ChatGPT is at capacity')]")
|
||||
console.log('capacity', res)
|
||||
|
||||
if (!res?.length) {
|
||||
res = await page.$x("//div[contains(., 'at capacity right now')]")
|
||||
console.log('capacity2', res)
|
||||
}
|
||||
} catch (err) {
|
||||
// ignore errors likely due to navigation
|
||||
}
|
||||
|
|
Ładowanie…
Reference in New Issue