diff --git a/legacy/demos/demo.ts b/legacy/demos/demo.ts index 7f432413..0c18173e 100644 --- a/legacy/demos/demo.ts +++ b/legacy/demos/demo.ts @@ -1,7 +1,7 @@ import dotenv from 'dotenv-safe' import { oraPromise } from 'ora' -import { ChatGPTAPI, getOpenAIAuth } from '../src' +import { ChatGPTAPIBrowser } from '../src' dotenv.config() @@ -16,13 +16,9 @@ async function main() { const email = process.env.OPENAI_EMAIL const password = process.env.OPENAI_PASSWORD - const authInfo = await getOpenAIAuth({ - email, - password - }) - - const api = new ChatGPTAPI({ ...authInfo }) - await api.ensureAuth() + const api = new ChatGPTAPIBrowser({ email, password }) + const res = await api.init() + console.log('init result', res) const prompt = 'Write a python version of bubble sort. Do not include example usage.' @@ -31,6 +27,7 @@ async function main() { text: prompt }) + await api.close() return response } diff --git a/legacy/package.json b/legacy/package.json index 9de81cb3..5590c189 100644 --- a/legacy/package.json +++ b/legacy/package.json @@ -38,8 +38,10 @@ "delay": "^5.0.0", "eventsource-parser": "^0.0.5", "expiry-map": "^2.0.0", + "html-to-md": "^0.8.3", "p-timeout": "^6.0.0", "puppeteer-extra": "^3.3.4", + "puppeteer-extra-plugin-recaptcha": "^3.6.6", "puppeteer-extra-plugin-stealth": "^2.11.1", "remark": "^14.0.2", "strip-markdown": "^5.0.0", diff --git a/legacy/pnpm-lock.yaml b/legacy/pnpm-lock.yaml index 38dbc653..ae8fa641 100644 --- a/legacy/pnpm-lock.yaml +++ b/legacy/pnpm-lock.yaml @@ -10,6 +10,7 @@ specifiers: dotenv-safe: ^8.2.0 eventsource-parser: ^0.0.5 expiry-map: ^2.0.0 + html-to-md: ^0.8.3 husky: ^8.0.2 lint-staged: ^13.0.3 npm-run-all: ^4.1.5 @@ -18,6 +19,7 @@ specifiers: prettier: ^2.8.0 puppeteer: ^19.4.0 puppeteer-extra: ^3.3.4 + puppeteer-extra-plugin-recaptcha: ^3.6.6 puppeteer-extra-plugin-stealth: ^2.11.1 remark: ^14.0.2 strip-markdown: ^5.0.0 @@ -32,8 +34,10 @@ dependencies: delay: 5.0.0 eventsource-parser: 0.0.5 expiry-map: 2.0.0 + html-to-md: 0.8.3 p-timeout: 6.0.0 puppeteer-extra: 3.3.4_puppeteer@19.4.0 + puppeteer-extra-plugin-recaptcha: 3.6.6_puppeteer-extra@3.3.4 puppeteer-extra-plugin-stealth: 2.11.1_puppeteer-extra@3.3.4 remark: 14.0.2 strip-markdown: 5.0.0 @@ -1789,6 +1793,10 @@ packages: lru-cache: 6.0.0 dev: true + /html-to-md/0.8.3: + resolution: {integrity: sha512-Va+bB1YOdD6vMRDue9/l7YxbERgwOgsos4erUDRfRN6YE0B2Wbbw8uAj6xZJk9A9vrjVy7mG/WLlhDw6RXfgsA==} + dev: false + /https-proxy-agent/5.0.1: resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} engines: {node: '>= 6'} @@ -3065,6 +3073,26 @@ packages: - supports-color - utf-8-validate + /puppeteer-extra-plugin-recaptcha/3.6.6_puppeteer-extra@3.3.4: + resolution: {integrity: sha512-SVbmL+igGX8m0Qg9dn85trWDghbfUCTG/QUHYscYx5XgMZVVb0/v0a6MqbPdHoKmBx5BS2kLd6rorMlncMcXdw==} + engines: {node: '>=9.11.2'} + peerDependencies: + playwright-extra: '*' + puppeteer-extra: '*' + peerDependenciesMeta: + playwright-extra: + optional: true + puppeteer-extra: + optional: true + dependencies: + debug: 4.3.4 + merge-deep: 3.0.3 + puppeteer-extra: 3.3.4_puppeteer@19.4.0 + puppeteer-extra-plugin: 3.2.2_puppeteer-extra@3.3.4 + transitivePeerDependencies: + - supports-color + dev: false + /puppeteer-extra-plugin-stealth/2.11.1_puppeteer-extra@3.3.4: resolution: {integrity: sha512-n0wdC0Ilc9tk5L6FWLyd0P2gT8b2fp+2NuB+KB0oTSw3wXaZ0D6WNakjJsayJ4waGzIJFCUHkmK9zgx5NKMoFw==} engines: {node: '>=8'} diff --git a/legacy/src/chatgpt-api-browser.ts b/legacy/src/chatgpt-api-browser.ts new file mode 100644 index 00000000..377bf7ac --- /dev/null +++ b/legacy/src/chatgpt-api-browser.ts @@ -0,0 +1,236 @@ +import delay from 'delay' +import html2md from 'html-to-md' +import { type Browser, type HTTPResponse, type Page } from 'puppeteer' + +import * as types from './types' +import { getBrowser, getOpenAIAuth } from './openai-auth' + +export class ChatGPTAPIBrowser { + protected _markdown: boolean + protected _debug: boolean + protected _isGoogleLogin: boolean + protected _captchaToken: string + + protected _email: string + protected _password: string + + protected _browser: Browser + protected _page: Page + + /** + * Creates a new client wrapper for automating the ChatGPT webapp. + */ + constructor(opts: { + email: string + password: string + + /** @defaultValue `true` **/ + markdown?: boolean + + /** @defaultValue `false` **/ + debug?: boolean + + isGoogleLogin?: boolean + captchaToken?: string + }) { + const { + email, + password, + markdown = true, + debug = false, + isGoogleLogin = false, + captchaToken + } = opts + + this._email = email + this._password = password + + this._markdown = !!markdown + this._debug = !!debug + this._isGoogleLogin = !!isGoogleLogin + this._captchaToken = captchaToken + } + + async init() { + if (this._browser) { + await this._browser.close() + this._page = null + this._browser = null + } + + this._browser = await getBrowser({ captchaToken: this._captchaToken }) + this._page = + (await this._browser.pages())[0] || (await this._browser.newPage()) + + // bypass cloudflare and login + await getOpenAIAuth({ + email: this._email, + password: this._password, + browser: this._browser, + page: this._page, + isGoogleLogin: this._isGoogleLogin + }) + + const chatUrl = 'https://chat.openai.com/chat' + const url = this._page.url().replace(/\/$/, '') + + if (url !== chatUrl) { + await this._page.goto(chatUrl, { + waitUntil: 'networkidle0' + }) + } + + // dismiss welcome modal + do { + const modalSelector = '[data-headlessui-state="open"]' + + if (!(await this._page.$(modalSelector))) { + break + } + + try { + await this._page.click(`${modalSelector} button:last-child`) + } catch (err) { + // "next" button not found in welcome modal + break + } + + await delay(500) + } while (true) + + if (!this.getIsAuthenticated()) { + return false + } + + // this._page.on('response', this._onResponse.bind(this)) + return true + } + + // _onResponse = (response: HTTPResponse) => { + // const request = response.request() + + // console.log('response', { + // url: response.url(), + // ok: response.ok(), + // status: response.status(), + // statusText: response.statusText(), + // headers: response.headers(), + // request: { + // method: request.method(), + // headers: request.headers() + // } + // }) + // } + + async getIsAuthenticated() { + try { + const inputBox = await this._getInputBox() + return !!inputBox + } catch (err) { + // can happen when navigating during login + return false + } + } + + async getLastMessage(): Promise { + const messages = await this.getMessages() + + if (messages) { + return messages[messages.length - 1] + } else { + return null + } + } + + async getPrompts(): Promise { + // Get all prompts + const messages = await this._page.$$( + '.text-base:has(.whitespace-pre-wrap):not(:has(button:nth-child(2))) .whitespace-pre-wrap' + ) + + // Prompts are always plaintext + return Promise.all(messages.map((a) => a.evaluate((el) => el.textContent))) + } + + async getMessages(): Promise { + // Get all complete messages + // (in-progress messages that are being streamed back don't contain action buttons) + const messages = await this._page.$$( + '.text-base:has(.whitespace-pre-wrap):has(button:nth-child(2)) .whitespace-pre-wrap' + ) + + if (this._markdown) { + const htmlMessages = await Promise.all( + messages.map((a) => a.evaluate((el) => el.innerHTML)) + ) + + const markdownMessages = htmlMessages.map((messageHtml) => { + // parse markdown from message HTML + messageHtml = messageHtml.replace('Copy code', '') + return html2md(messageHtml, { + ignoreTags: [ + 'button', + 'svg', + 'style', + 'form', + 'noscript', + 'script', + 'meta', + 'head' + ], + skipTags: ['button', 'svg'] + }) + }) + + return markdownMessages + } else { + // plaintext + const plaintextMessages = await Promise.all( + messages.map((a) => a.evaluate((el) => el.textContent)) + ) + return plaintextMessages + } + } + + async sendMessage(message: string): Promise { + const inputBox = await this._getInputBox() + if (!inputBox) throw new Error('not signed in') + + const lastMessage = await this.getLastMessage() + + await inputBox.click() + await inputBox.type(message, { delay: 0 }) + await inputBox.press('Enter') + + do { + await delay(1000) + + // TODO: this logic needs some work because we can have repeat messages... + const newLastMessage = await this.getLastMessage() + if ( + newLastMessage && + lastMessage?.toLowerCase() !== newLastMessage?.toLowerCase() + ) { + return newLastMessage + } + } while (true) + } + + async resetThread() { + const resetButton = await this._page.$('nav > a:nth-child(1)') + if (!resetButton) throw new Error('not signed in') + + await resetButton.click() + } + + async close() { + await this._browser.close() + this._page = null + this._browser = null + } + + protected async _getInputBox() { + // [data-id="root"] + return this._page.$('textarea') + } +} diff --git a/legacy/src/chatgpt-api.ts b/legacy/src/chatgpt-api.ts index fea5af68..ab4f71dc 100644 --- a/legacy/src/chatgpt-api.ts +++ b/legacy/src/chatgpt-api.ts @@ -95,6 +95,7 @@ export class ChatGPTAPI { 'user-agent': this._userAgent, 'x-openai-assistant-app-id': '', 'accept-language': 'en-US,en;q=0.9', + 'accept-encoding': 'gzip, deflate, br', origin: 'https://chat.openai.com', referer: 'https://chat.openai.com/chat', 'sec-ch-ua': @@ -299,6 +300,45 @@ export class ChatGPTAPI { } } + async sendModeration(input: string) { + const accessToken = await this.refreshAccessToken() + const url = `${this._backendApiBaseUrl}/moderations` + const headers = { + ...this._headers, + Authorization: `Bearer ${accessToken}`, + Accept: '*/*', + 'Content-Type': 'application/json', + Cookie: `cf_clearance=${this._clearanceToken}` + } + + const body: types.ModerationsJSONBody = { + input, + model: 'text-moderation-playground' + } + + if (this._debug) { + console.log('POST', url, headers, body) + } + + const res = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(body) + }).then((r) => { + if (!r.ok) { + const error = new types.ChatGPTError(`${r.status} ${r.statusText}`) + error.response = r + error.statusCode = r.status + error.statusText = r.statusText + throw error + } + + return r.json() as any as types.ModerationsJSONResult + }) + + return res + } + /** * @returns `true` if the client has a valid acces token or `false` if refreshing * the token fails. diff --git a/legacy/src/index.ts b/legacy/src/index.ts index 976d160a..ab2ab044 100644 --- a/legacy/src/index.ts +++ b/legacy/src/index.ts @@ -1,4 +1,5 @@ export * from './chatgpt-api' +export * from './chatgpt-api-browser' export * from './chatgpt-conversation' export * from './types' export * from './utils' diff --git a/legacy/src/openai-auth.ts b/legacy/src/openai-auth.ts index 121fd424..7ece9b05 100644 --- a/legacy/src/openai-auth.ts +++ b/legacy/src/openai-auth.ts @@ -10,12 +10,15 @@ import { type PuppeteerLaunchOptions } from 'puppeteer' import puppeteer from 'puppeteer-extra' +import RecaptchaPlugin from 'puppeteer-extra-plugin-recaptcha' import StealthPlugin from 'puppeteer-extra-plugin-stealth' import * as types from './types' puppeteer.use(StealthPlugin()) +let hasRecaptchaPlugin = false + /** * Represents everything that's required to pass into `ChatGPTAPI` in order * to authenticate with the unofficial ChatGPT API. @@ -46,47 +49,64 @@ export async function getOpenAIAuth({ email, password, browser, + page, timeoutMs = 2 * 60 * 1000, - isGoogleLogin = false + // TODO: temporary for testing... + // timeoutMs = 60 * 60 * 1000, + isGoogleLogin = false, + captchaToken = process.env.CAPTCHA_TOKEN }: { email?: string password?: string browser?: Browser + page?: Page timeoutMs?: number isGoogleLogin?: boolean + captchaToken?: string }): Promise { - let page: Page - let origBrowser = browser + const origBrowser = browser + const origPage = page try { if (!browser) { - browser = await getBrowser() + browser = await getBrowser({ captchaToken }) } const userAgent = await browser.userAgent() - page = (await browser.pages())[0] || (await browser.newPage()) - page.setDefaultTimeout(timeoutMs) + if (!page) { + page = (await browser.pages())[0] || (await browser.newPage()) + page.setDefaultTimeout(timeoutMs) + } - await page.goto('https://chat.openai.com/auth/login') + await page.goto('https://chat.openai.com/auth/login', { + waitUntil: 'networkidle0' + }) // NOTE: this is where you may encounter a CAPTCHA + if (hasRecaptchaPlugin) { + await page.solveRecaptchas() + } + await checkForChatGPTAtCapacity(page) - await page.waitForSelector('#__next .btn-primary', { timeout: timeoutMs }) - - // once we get to this point, the Cloudflare cookies are available - await delay(1000) + // once we get to this point, the Cloudflare cookies should be available // login as well (optional) if (email && password) { + await page.waitForSelector('#__next .btn-primary', { timeout: timeoutMs }) + await delay(500) + await Promise.all([ + // click login button page.click('#__next .btn-primary'), page.waitForNavigation({ waitUntil: 'networkidle0' }) ]) - let submitP: Promise + await checkForChatGPTAtCapacity(page) + + let submitP: () => Promise if (isGoogleLogin) { await page.click('button[data-provider="google"]') @@ -98,19 +118,25 @@ export async function getOpenAIAuth({ ]) await page.waitForSelector('input[type="password"]', { visible: true }) await page.type('input[type="password"]', password, { delay: 10 }) - submitP = page.keyboard.press('Enter') + submitP = () => page.keyboard.press('Enter') } else { await page.waitForSelector('#username') - await page.type('#username', email, { delay: 10 }) + await page.type('#username', email, { delay: 20 }) + await delay(100) + + if (hasRecaptchaPlugin) { + console.log('solveRecaptchas()') + const res = await page.solveRecaptchas() + console.log('solveRecaptchas result', res) + } + await page.click('button[type="submit"]') await page.waitForSelector('#password') await page.type('#password', password, { delay: 10 }) - submitP = page.click('button[type="submit"]') + submitP = () => page.click('button[type="submit"]') } await Promise.all([ - submitP, - new Promise((resolve, reject) => { let resolved = false @@ -151,7 +177,9 @@ export async function getOpenAIAuth({ }) setTimeout(waitForCapacityText, 500) - }) + }), + + submitP() ]) } @@ -170,11 +198,10 @@ export async function getOpenAIAuth({ return authInfo } catch (err) { - console.error(err) throw err } finally { if (origBrowser) { - if (page) { + if (page && page !== origPage) { await page.close() } } else if (browser) { @@ -191,7 +218,28 @@ export async function getOpenAIAuth({ * able to use the built-in `puppeteer` version of Chromium because Cloudflare * recognizes it and blocks access. */ -export async function getBrowser(launchOptions?: PuppeteerLaunchOptions) { +export async function getBrowser( + opts: PuppeteerLaunchOptions & { + captchaToken?: string + } = {} +) { + const { captchaToken = process.env.CAPTCHA_TOKEN, ...launchOptions } = opts + + if (captchaToken && !hasRecaptchaPlugin) { + hasRecaptchaPlugin = true + console.log('use captcha', captchaToken) + + puppeteer.use( + RecaptchaPlugin({ + provider: { + id: '2captcha', + token: captchaToken + }, + visualFeedback: true // colorize reCAPTCHAs (violet = detected, green = solved) + }) + ) + } + return puppeteer.launch({ headless: false, args: ['--no-sandbox', '--exclude-switches', 'enable-automation'], @@ -212,16 +260,17 @@ export const defaultChromeExecutablePath = (): string => { case 'darwin': return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' - default: + default: { /** - * Since two (2) separate chrome releases exists on linux - * we first do a check to ensure we're executing the right one. + * Since two (2) separate chrome releases exist on linux, we first do a + * check to ensure we're executing the right one. */ const chromeExists = fs.existsSync('/usr/bin/google-chrome') return chromeExists ? '/usr/bin/google-chrome' : '/usr/bin/google-chrome-stable' + } } } @@ -231,6 +280,12 @@ async function checkForChatGPTAtCapacity(page: Page) { try { // res = await page.$('[role="alert"]') res = await page.$x("//div[contains(., 'ChatGPT is at capacity')]") + console.log('capacity', res) + + if (!res?.length) { + res = await page.$x("//div[contains(., 'at capacity right now')]") + console.log('capacity2', res) + } } catch (err) { // ignore errors likely due to navigation }