import * as fs from 'node:fs' import * as os from 'node:os' import * as path from 'node:path' import * as url from 'node:url' import delay from 'delay' import { TimeoutError } from 'p-timeout' import { Browser, Page, Protocol, PuppeteerLaunchOptions } from 'puppeteer' import puppeteer from 'puppeteer-extra' import RecaptchaPlugin from 'puppeteer-extra-plugin-recaptcha' import StealthPlugin from 'puppeteer-extra-plugin-stealth' import random from 'random' import * as types from './types' import { minimizePage } from './utils' puppeteer.use(StealthPlugin()) let hasRecaptchaPlugin = false let hasNopechaExtension = false const __dirname = url.fileURLToPath(new URL('.', import.meta.url)) const DEFAULT_TIMEOUT_MS = 3 * 60 * 1000 // 3 minutes /** * Represents everything that's required to pass into `ChatGPTAPI` in order * to authenticate with the unofficial ChatGPT API. */ export type OpenAIAuth = { userAgent: string clearanceToken: string sessionToken: string cookies?: Record } /** * Bypasses OpenAI's use of Cloudflare to get the cookies required to use * ChatGPT. Uses Puppeteer with a stealth plugin under the hood. * * If you pass `email` and `password`, then it will log into the account and * include a `sessionToken` in the response. * * If you don't pass `email` and `password`, then it will just return a valid * `clearanceToken`. * * This can be useful because `clearanceToken` expires after ~2 hours, whereas * `sessionToken` generally lasts much longer. We recommend renewing your * `clearanceToken` every hour or so and creating a new instance of `ChatGPTAPI` * with your updated credentials. */ export async function getOpenAIAuth({ email, password, browser, page, timeoutMs = DEFAULT_TIMEOUT_MS, isGoogleLogin = false, isMicrosoftLogin = false, captchaToken = process.env.CAPTCHA_TOKEN, nopechaKey = process.env.NOPECHA_KEY, executablePath, proxyServer = process.env.PROXY_SERVER, minimize = false }: { email?: string password?: string browser?: Browser page?: Page timeoutMs?: number isGoogleLogin?: boolean isMicrosoftLogin?: boolean minimize?: boolean captchaToken?: string nopechaKey?: string executablePath?: string proxyServer?: string }): Promise { const origBrowser = browser const origPage = page try { if (!browser) { browser = await getBrowser({ captchaToken, nopechaKey, executablePath, proxyServer, timeoutMs }) } const userAgent = await browser.userAgent() if (!page) { page = (await browser.pages())[0] || (await browser.newPage()) page.setDefaultTimeout(timeoutMs) if (minimize) { await minimizePage(page) } } await page.goto('https://chat.openai.com/auth/login', { waitUntil: 'networkidle2' }) // NOTE: this is where you may encounter a CAPTCHA await checkForChatGPTAtCapacity(page, { timeoutMs }) if (hasRecaptchaPlugin) { const captchas = await page.findRecaptchas() if (captchas?.filtered?.length) { console.log('solving captchas using 2captcha...') const res = await page.solveRecaptchas() console.log('captcha result', res) } } // once we get to this point, the Cloudflare cookies should be available // login as well (optional) if (email && password) { await waitForConditionOrAtCapacity(page, () => page.waitForSelector('#__next .btn-primary', { timeout: timeoutMs }) ) await delay(500) // click login button and wait for navigation to finish do { await Promise.all([ page.waitForNavigation({ waitUntil: 'networkidle2', timeout: timeoutMs }), page.click('#__next .btn-primary') ]) await delay(500) } while (page.url().endsWith('/auth/login')) await checkForChatGPTAtCapacity(page, { timeoutMs }) let submitP: () => Promise if (isGoogleLogin) { await page.waitForSelector('button[data-provider="google"]', { timeout: timeoutMs }) await page.click('button[data-provider="google"]') await page.waitForSelector('input[type="email"]') await page.type('input[type="email"]', email, { delay: 10 }) await Promise.all([ page.waitForNavigation(), await page.keyboard.press('Enter') ]) await page.waitForSelector('input[type="password"]', { visible: true }) await page.type('input[type="password"]', password, { delay: 10 }) submitP = () => page.keyboard.press('Enter') } else if (isMicrosoftLogin) { await page.click('button[data-provider="windowslive"]') await page.waitForSelector('input[type="email"]') await page.type('input[type="email"]', email, { delay: 10 }) await Promise.all([ page.waitForNavigation(), await page.keyboard.press('Enter') ]) await delay(1500) await page.waitForSelector('input[type="password"]', { visible: true }) await page.type('input[type="password"]', password, { delay: 10 }) submitP = () => page.keyboard.press('Enter') await Promise.all([ page.waitForNavigation(), await page.keyboard.press('Enter') ]) await delay(1000) } else { await page.waitForSelector('#username') await page.type('#username', email) await delay(100) // NOTE: this is where you may encounter a CAPTCHA if (hasNopechaExtension) { await waitForRecaptcha(page, { timeoutMs }) } else if (hasRecaptchaPlugin) { console.log('solving captchas using 2captcha...') // Add retries in case network is unstable const retries = 3 for (let i = 0; i < retries; i++) { try { const res = await page.solveRecaptchas() if (res.captchas?.length) { console.log('captchas result', res) break } else { console.log('no captchas found') await delay(500) } } catch (e) { console.log('captcha error', e) } } } await delay(2000) const frame = page.mainFrame() const submit = await page.waitForSelector('button[type="submit"]', { timeout: timeoutMs }) await frame.focus('button[type="submit"]') await submit.focus() await submit.click() await page.waitForSelector('#password', { timeout: timeoutMs }) await page.type('#password', password, { delay: 10 }) submitP = () => page.click('button[type="submit"]') } await Promise.all([ waitForConditionOrAtCapacity(page, () => page.waitForNavigation({ waitUntil: 'networkidle2', timeout: timeoutMs }) ), submitP() ]) } else { await delay(2000) await checkForChatGPTAtCapacity(page, { timeoutMs }) } const pageCookies = await page.cookies() const cookies = pageCookies.reduce( (map, cookie) => ({ ...map, [cookie.name]: cookie }), {} ) const authInfo: OpenAIAuth = { userAgent, clearanceToken: cookies['cf_clearance']?.value, sessionToken: cookies['__Secure-next-auth.session-token']?.value, cookies } return authInfo } catch (err) { throw err } finally { if (origBrowser) { if (page && page !== origPage) { await page.close() } } else if (browser) { await browser.close() } page = null browser = null } } /** * Launches a non-puppeteer instance of Chrome. Note that in my testing, I wasn't * able to use the built-in `puppeteer` version of Chromium because Cloudflare * recognizes it and blocks access. */ export async function getBrowser( opts: PuppeteerLaunchOptions & { captchaToken?: string nopechaKey?: string proxyServer?: string minimize?: boolean timeoutMs?: number } = {} ) { const { captchaToken = process.env.CAPTCHA_TOKEN, nopechaKey = process.env.NOPECHA_KEY, executablePath = defaultChromeExecutablePath(), proxyServer = process.env.PROXY_SERVER, minimize = false, timeoutMs = DEFAULT_TIMEOUT_MS, ...launchOptions } = opts if (captchaToken && !hasRecaptchaPlugin) { hasRecaptchaPlugin = true // console.log('use captcha', captchaToken) puppeteer.use( RecaptchaPlugin({ provider: { id: '2captcha', token: captchaToken }, visualFeedback: true // colorize reCAPTCHAs (violet = detected, green = solved) }) ) } // https://peter.sh/experiments/chromium-command-line-switches/ const puppeteerArgs = [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-infobars', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled', '--ignore-certificate-errors', '--no-first-run', '--no-service-autorun', '--password-store=basic', '--system-developer-mode', // the following flags all try to reduce memory // '--single-process', '--mute-audio', '--disable-default-apps', '--no-zygote', '--disable-accelerated-2d-canvas', '--disable-web-security' // '--disable-gpu' // '--js-flags="--max-old-space-size=1024"' ] if (nopechaKey) { const nopechaPath = path.join( __dirname, '..', 'third-party', 'nopecha-chrome-extension' ) puppeteerArgs.push(`--disable-extensions-except=${nopechaPath}`) puppeteerArgs.push(`--load-extension=${nopechaPath}`) hasNopechaExtension = true } if (proxyServer) { const ipPort = proxyServer.includes('@') ? proxyServer.split('@')[1] : proxyServer puppeteerArgs.push(`--proxy-server=${ipPort}`) } const browser = await puppeteer.launch({ headless: false, // devtools: true, args: puppeteerArgs, ignoreDefaultArgs: [ '--disable-extensions', '--enable-automation', '--disable-component-extensions-with-background-pages' ], ignoreHTTPSErrors: true, executablePath, ...launchOptions }) if (process.env.PROXY_VALIDATE_IP) { const page = (await browser.pages())[0] || (await browser.newPage()) if (minimize) { await minimizePage(page) } // Send a fetch request to https://ifconfig.co using page.evaluate() and // verify that the IP matches let ip: string try { const res = await page.evaluate(() => { return fetch('https://ifconfig.co', { headers: { Accept: 'application/json' } }).then((res) => res.json()) }) ip = res?.ip } catch (err) { throw new Error(`Proxy IP validation failed: ${err.toString()}`) } if (!ip || ip !== process.env.PROXY_VALIDATE_IP) { throw new Error( `Proxy IP mismatch: ${ip} !== ${process.env.PROXY_VALIDATE_IP}` ) } } await initializeNopechaExtension(browser, { minimize, nopechaKey, timeoutMs }) return browser } export async function initializeNopechaExtension( browser: Browser, opts: { minimize?: boolean nopechaKey?: string timeoutMs?: number } ) { const { minimize = false, nopechaKey } = opts if (hasNopechaExtension) { const page = (await browser.pages())[0] || (await browser.newPage()) if (minimize) { await minimizePage(page) } console.log('initializing nopecha extension with key', nopechaKey, '...') // TODO: setting the nopecha extension key is really, really error prone... for (let i = 0; i < 5; ++i) { await page.goto(`https://nopecha.com/setup#${nopechaKey}`, { waitUntil: 'networkidle0' }) await delay(500) } } } /** * Gets the default path to chrome's executable for the current platform. */ export const defaultChromeExecutablePath = (): string => { // return executablePath() if (process.env.PUPPETEER_EXECUTABLE_PATH) { return process.env.PUPPETEER_EXECUTABLE_PATH } switch (os.platform()) { case 'win32': return 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe' case 'darwin': return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' default: { /** * Since two (2) separate chrome releases exist on linux, we first do a * check to ensure we're executing the right one. */ const chromeExists = fs.existsSync('/usr/bin/google-chrome') return chromeExists ? '/usr/bin/google-chrome' : '/usr/bin/google-chrome-stable' } } } async function checkForChatGPTAtCapacity( page: Page, opts: { timeoutMs?: number pollingIntervalMs?: number retries?: number } = {} ) { const { timeoutMs = 2 * 60 * 1000, // 2 minutes pollingIntervalMs = 3000, retries = 10 } = opts // console.log('checkForChatGPTAtCapacity', page.url()) let isAtCapacity = false let numTries = 0 do { try { await solveSimpleCaptchas(page) const res = await page.$x("//div[contains(., 'ChatGPT is at capacity')]") isAtCapacity = !!res?.length if (isAtCapacity) { if (++numTries >= retries) { break } // try refreshing the page if chatgpt is at capacity await page.reload({ waitUntil: 'networkidle2', timeout: timeoutMs }) await delay(pollingIntervalMs) } } catch (err) { // ignore errors likely due to navigation ++numTries break } } while (isAtCapacity) if (isAtCapacity) { const error = new types.ChatGPTError('ChatGPT is at capacity') error.statusCode = 503 throw error } } async function waitForConditionOrAtCapacity( page: Page, condition: () => Promise, opts: { pollingIntervalMs?: number } = {} ) { const { pollingIntervalMs = 500 } = opts return new Promise((resolve, reject) => { let resolved = false async function waitForCapacityText() { if (resolved) { return } try { await checkForChatGPTAtCapacity(page) if (!resolved) { setTimeout(waitForCapacityText, pollingIntervalMs) } } catch (err) { if (!resolved) { resolved = true return reject(err) } } } condition() .then(() => { if (!resolved) { resolved = true resolve() } }) .catch((err) => { if (!resolved) { resolved = true reject(err) } }) setTimeout(waitForCapacityText, pollingIntervalMs) }) } async function solveSimpleCaptchas(page: Page) { try { const verifyYouAreHuman = await page.$('text=Verify you are human') if (verifyYouAreHuman) { await delay(2000) await verifyYouAreHuman.click({ delay: random.int(5, 25) }) await delay(1000) } const cloudflareButton = await page.$('.hcaptcha-box') if (cloudflareButton) { await delay(2000) await cloudflareButton.click({ delay: random.int(5, 25) }) await delay(1000) } } catch (err) { // ignore errors } } async function waitForRecaptcha( page: Page, opts: { pollingIntervalMs?: number timeoutMs?: number } = {} ) { await solveSimpleCaptchas(page) if (!hasNopechaExtension) { return } const { pollingIntervalMs = 100, timeoutMs } = opts const captcha = await page.$('textarea#g-recaptcha-response') const startTime = Date.now() if (captcha) { console.log('waiting to solve recaptcha...') do { try { const captcha = await page.$('textarea#g-recaptcha-response') if (!captcha) { // the user may have gone past the page manually console.log('captcha no longer found; continuing') break } const value = (await captcha.evaluate((el) => el.value))?.trim() if (value?.length) { // recaptcha has been solved! console.log('captcha solved; continuing') break } } catch (err) { // catch navigation-related page context errors } if (timeoutMs) { const now = Date.now() if (now - startTime >= timeoutMs) { throw new TimeoutError('Timed out waiting to solve Recaptcha') } } await delay(pollingIntervalMs) } while (true) } }