feat: move puppeteer automation into main lib; update docs

2022-12-12 11:23:03 -06:00 · 2022-12-12 11:23:03 -06:00 · 1d621d0c3c
commit 1d621d0c3c
--- a/demos/demo-conversation.ts
+++ b/demos/demo-conversation.ts
@ -1,8 +1,7 @@
 import dotenv from 'dotenv-safe'
 import { oraPromise } from 'ora'

-import { ChatGPTAPI } from '../src'
-import { getOpenAIAuthInfo } from './openai-auth-puppeteer'
+import { ChatGPTAPI, getOpenAIAuth } from '../src'

 dotenv.config()

@ -17,7 +16,7 @@ async function main() {
  const email = process.env.EMAIL
  const password = process.env.PASSWORD

-  const authInfo = await getOpenAIAuthInfo({
+  const authInfo = await getOpenAIAuth({
    email,
    password
  })
--- a/demos/demo.ts
+++ b/demos/demo.ts
@ -1,8 +1,7 @@
 import dotenv from 'dotenv-safe'
 import { oraPromise } from 'ora'

-import { ChatGPTAPI } from '../src'
-import { getOpenAIAuthInfo } from './openai-auth-puppeteer'
+import { ChatGPTAPI, getOpenAIAuth } from '../src'

 dotenv.config()

@ -17,7 +16,7 @@ async function main() {
  const email = process.env.EMAIL
  const password = process.env.PASSWORD

-  const authInfo = await getOpenAIAuthInfo({
+  const authInfo = await getOpenAIAuth({
    email,
    password
  })
--- a/package.json
+++ b/package.json
@ -20,14 +20,13 @@
    "build"
  ],
  "engines": {
-    "node": ">=16.8"
+    "node": ">=18"
  },
  "scripts": {
    "build": "tsup",
    "dev": "tsup --watch",
    "clean": "del build",
    "prebuild": "run-s clean",
-    "postbuild": "[ -n CI ] && sed -i '' 's/await import(\"undici\")/null/' build/browser/index.js || echo 'skipping postbuild on CI'",
    "predev": "run-s clean",
    "pretest": "run-s build",
    "docs": "typedoc",
@ -42,7 +41,10 @@
    "p-timeout": "^6.0.0",
    "remark": "^14.0.2",
    "strip-markdown": "^5.0.0",
-    "uuid": "^9.0.0"
+    "delay": "^5.0.0",
+    "uuid": "^9.0.0",
+    "puppeteer-extra": "^3.3.4",
+    "puppeteer-extra-plugin-stealth": "^2.11.1"
  },
  "devDependencies": {
    "@trivago/prettier-plugin-sort-imports": "^4.0.0",
@ -50,7 +52,6 @@
    "@types/uuid": "^9.0.0",
    "ava": "^5.1.0",
    "del-cli": "^5.0.0",
-    "delay": "^5.0.0",
    "dotenv-safe": "^8.2.0",
    "husky": "^8.0.2",
    "lint-staged": "^13.0.3",
@ -58,16 +59,14 @@
    "ora": "^6.1.2",
    "prettier": "^2.8.0",
    "puppeteer": "^19.4.0",
-    "puppeteer-extra": "^3.3.4",
-    "puppeteer-extra-plugin-stealth": "^2.11.1",
    "tsup": "^6.5.0",
    "tsx": "^3.12.1",
    "typedoc": "^0.23.21",
    "typedoc-plugin-markdown": "^3.13.6",
    "typescript": "^4.9.3"
  },
-  "optionalDependencies": {
-    "undici": "^5.13.0"
+  "peerDependencies": {
+    "puppeteer": "*"
  },
  "lint-staged": {
    "*.{ts,tsx}": [
--- a/readme.md
+++ b/readme.md
@ -4,28 +4,9 @@ Yesterday, OpenAI added additional Cloudflare protections that make it more diff

 The demos have been updated to use Puppeteer to log in to ChatGPT and extract the Cloudflare `cf_clearance` cookie and OpenAI session token. 🔥

-To use the updated version, first make sure you're using the latest version of this package and Node.js >= 18:
+To use the updated version, make sure you're using the latest version of this package and Node.js >= 18. Then update your code to use the examples below, paying special attention to the sections on [Authentication](#authentication) and [Restrictions](#restrictions).

-```ts
-const api = new ChatGPTAPI({
-  sessionToken: process.env.SESSION_TOKEN,
-  clearanceToken: process.env.CLEARANCE_TOKEN,
-  userAgent: '' // needs to match your browser's user agent
-})
-
-await api.ensureAuth()
-```
-
-Restrictions on this method:
-
- Cloudflare `cf_clearance` **tokens expire after 2 hours**, so right now we recommend that you refresh your `cf_clearance` token every ~45 minutes or so.
- Your `user-agent` and `IP address` **must match** from the real browser window you're logged in with to the one you're using for `ChatGPTAPI`.
-  - This means that you currently can't log in with your laptop and then run the bot on a server or proxy somewhere.
- Cloudflare will still sometimes ask you to complete a CAPTCHA, so you may need to keep an eye on it and manually resolve the CAPTCHA. Automated CAPTCHA bypass is a WIP.
- You must use `node >= 18`. I'm using `v19.2.0` in my testing, but for some reason, all `fetch` requests using Node.js `v16` and `v17` fail at the moment (these use `undici` under the hood, whereas Node.js v18 and above use a built-in `fetch` based on `undici`).
- You should not be using this account while the bot is using it, because that browser window may refresh one of your tokens and invalidate the bot's session.
-
-We're working hard in [this issue](https://github.com/transitive-bullshit/chatgpt-api/issues/96) to make this process easier and more automated.
+We're working hard in [this issue](https://github.com/transitive-bullshit/chatgpt-api/issues/96) to improve this process. Keep in mind that this package will be updated to use the official API as soon as it's released. 💪

 Cheers,
 Travis
@ -48,7 +29,8 @@ Travis
  - [Usage](#usage)
    - [Docs](#docs)
    - [Demos](#demos)
-    - [Session Tokens](#session-tokens)
+    - [Authentication](#authentication)
+      - [Restrictions](#restrictions)
  - [Projects](#projects)
  - [Compatibility](#compatibility)
  - [Credits](#credits)
@ -69,15 +51,17 @@ npm install chatgpt
 ## Usage

 ```ts
-import { ChatGPTAPI } from 'chatgpt'
+import { ChatGPTAPI, getOpenAIAuth } from 'chatgpt'

 async function example() {
-  const api = new ChatGPTAPI({
-    sessionToken: process.env.SESSION_TOKEN,
-    clearanceToken: process.env.CLEARANCE_TOKEN,
-    userAgent: 'TODO'
+  // uses puppeteer to bypass cloudflare (headful because you may have to solve
+  // a captcha)
+  const openAIAuth = await getOpenAIAuth({
+    email: process.env.EMAIL,
+    password: process.env.EMAIL
  })

+  const api = new ChatGPTAPI({ ...openAIAuth })
  await api.ensureAuth()

  // send a message and wait for the response
@ -93,32 +77,23 @@ async function example() {
 ChatGPT responses are formatted as markdown by default. If you want to work with plaintext instead, you can use:

 ```ts
-const api = new ChatGPTAPI({
-  sessionToken: process.env.SESSION_TOKEN,
-  clearanceToken: process.env.CLEARANCE_TOKEN,
-  userAgent: 'TODO',
-  markdown: false
-})
+const api = new ChatGPTAPI({ ...openAIAuth, markdown: false })
 ```

 If you want to automatically track the conversation, you can use `ChatGPTAPI.getConversation()`:

 ```ts
-const api = new ChatGPTAPI({
-  sessionToken: process.env.SESSION_TOKEN,
-  clearanceToken: process.env.CLEARANCE_TOKEN,
-  userAgent: 'TODO'
-})
+const api = new ChatGPTAPI({ ...openAIAuth, markdown: false })

 const conversation = api.getConversation()

 // send a message and wait for the response
 const response0 = await conversation.sendMessage('What is OpenAI?')

-// send a follow-up prompt to the previous message and wait for the response
+// send a follow-up
 const response1 = await conversation.sendMessage('Can you expand on that?')

-// send another follow-up to the same conversation
+// send another follow-up
 const response2 = await conversation.sendMessage('Oh cool; thank you')
 ```

@ -141,13 +116,14 @@ You can stream responses using the `onProgress` or `onConversationResponse` call
 ```js
 async function example() {
  // To use ESM in CommonJS, you can use a dynamic import
-  const { ChatGPTAPI } = await import('chatgpt')
+  const { ChatGPTAPI, getOpenAIAuth } = await import('chatgpt')

-  const api = new ChatGPTAPI({
-    sessionToken: process.env.SESSION_TOKEN,
-    clearanceToken: process.env.CLEARANCE_TOKEN,
-    userAgent: 'TODO'
+  const openAIAuth = await getOpenAIAuth({
+    email: process.env.EMAIL,
+    password: process.env.EMAIL
  })
+
+  const api = new ChatGPTAPI({ ...openAIAuth })
  await api.ensureAuth()

  const response = await api.sendMessage('Hello World!')
@ -181,13 +157,21 @@ A [conversation demo](./demos/demo-conversation.ts) is also included:
 npx tsx src/demo-conversation.ts
 ```

-### Session Tokens
+### Authentication

-**This package requires a valid session token from ChatGPT to access it's unofficial REST API.**
+#### Restrictions

-As of December 11, 2021, it also requires a valid Cloudflare clearance token.
+**Please read carefully**

-There are two options to get these; either manually, or automated. For the automated way, see the `demos/` folder using Puppeteer.
+- You must use `node >= 18`. I'm using `v19.2.0` in my testing, but for some reason, all `fetch` requests using Node.js `v16` and `v17` fail at the moment (these use `undici` under the hood, whereas Node.js v18 and above use a built-in `fetch` based on `undici`).
+- Cloudflare `cf_clearance` **tokens expire after 2 hours**, so right now we recommend that you refresh your `cf_clearance` token every hour or so.
+- Your `user-agent` and `IP address` **must match** from the real browser window you're logged in with to the one you're using for `ChatGPTAPI`.
+  - This means that you currently can't log in with your laptop and then run the bot on a server or proxy somewhere.
+- Cloudflare will still sometimes ask you to complete a CAPTCHA, so you may need to keep an eye on it and manually resolve the CAPTCHA. Automated CAPTCHA bypass is coming soon.
+- You should not be using this account while the bot is using it, because that browser window may refresh one of your tokens and invalidate the bot's session.
+
+<details>
+<summary>Getting tokens manually</summary>

 To get a session token manually:

@ -195,8 +179,10 @@ To get a session token manually:
 2. Open dev tools.
 3. Open `Application` > `Cookies`.
   ![ChatGPT cookies](./media/session-token.png)
-4. Copy the value for `__Secure-next-auth.session-token` and save it to your environment.
-5. Copy the value for `cf_clearance` and save it to your environment.
+4. Copy the value for `__Secure-next-auth.session-token` and save it to your environment. This will be your `sessionToken`.
+5. Copy the value for `cf_clearance` and save it to your environment. This will be your `clearanceToken`.
+
+</details>

 > **Note**
 > This package will switch to using the official API once it's released.
@ -255,11 +241,8 @@ If you create a cool integration, feel free to open a PR and add it to the list.

 This package is ESM-only. It supports:

- Node.js >= 16.8
-  - If you need Node.js 14 support, use [`v1.4.0`](https://github.com/transitive-bullshit/chatgpt-api/releases/tag/v1.4.0)
- Edge runtimes like CF workers and Vercel edge functions
- Modern browsers
-  - Mainly meant for chrome extensions where your code is protected to a degree
+- Node.js >= 18
+  - Node.js 17, 16, and 14 were supported in earlier versions, but OpenAI's Cloudflare update caused a bug with `undici` on v17 and v16 that we need to debug. So for now, use `node >= 18`
  - We recommend against using `chatgpt` from client-side browser code because it would expose your private session token
  - If you want to build a website using `chatgpt`, we recommend using it only from your backend API

--- a/src/chatgpt-api.ts
+++ b/src/chatgpt-api.ts
@ -29,6 +29,9 @@ export class ChatGPTAPI {
  /**
   * Creates a new client wrapper around the unofficial ChatGPT REST API.
   *
+   * Note that your IP address and `userAgent` must match the same values that you used
+   * to obtain your `clearanceToken`.
+   *
   * @param opts.sessionToken = **Required** OpenAI session token which can be found in a valid session's cookies (see readme for instructions)
   * @param opts.clearanceToken = **Required** Cloudflare `cf_clearance` cookie value (see readme for instructions)
   * @param apiBaseUrl - Optional override; the base URL for ChatGPT webapp's API (`/api`)
@ -124,6 +127,21 @@ export class ChatGPTAPI {
    return this._user
  }

+  /** Gets the current session token. */
+  get sessionToken() {
+    return this._sessionToken
+  }
+
+  /** Gets the current Cloudflare clearance token (`cf_clearance` cookie value). */
+  get clearanceToken() {
+    return this._clearanceToken
+  }
+
+  /** Gets the current user agent. */
+  get userAgent() {
+    return this._userAgent
+  }
+
  /**
   * Sends a message to ChatGPT, waits for the response to resolve, and returns
   * the response.
@ -244,7 +262,23 @@ export class ChatGPTAPI {
            reject(err)
          }
        }
-      }).catch(reject)
+      }).catch((err) => {
+        const errMessageL = err.toString().toLowerCase()
+
+        if (
+          response &&
+          (errMessageL === 'error: typeerror: terminated' ||
+            errMessageL === 'typeerror: terminated')
+        ) {
+          // OpenAI sometimes forcefully terminates the socket from their end before
+          // the HTTP request has resolved cleanly. In my testing, these cases tend to
+          // happen when OpenAI has already send the last `response`, so we can ignore
+          // the `fetch` error in this case.
+          return resolve(response)
+        } else {
+          return reject(err)
+        }
+      })
    })

    if (timeoutMs) {
--- a/src/fetch.ts
+++ b/src/fetch.ts
@ -1,28 +1,13 @@
 /// <reference lib="dom" />

-let _undici: any
-
-// Use `undici` for node.js 16 and 17
 // Use `fetch` for node.js >= 18
 // Use `fetch` for all other environments, including browsers
-// NOTE: The top-level await is removed in a `postbuild` npm script for the
-// browser build
-const fetch =
-  globalThis.fetch ??
-  async function undiciFetchWrapper(
-    ...args: Parameters<typeof globalThis.fetch>
-  ): Promise<Response> {
-    if (!_undici) {
-      _undici = await import('undici')
-    }
+const fetch = globalThis.fetch

-    if (typeof _undici?.fetch !== 'function') {
-      throw new Error(
-        'Invalid undici installation; please make sure undici is installed correctly in your node_modules. Note that this package requires Node.js >= 16.8'
-      )
-    }
-
-    return _undici.fetch(...args)
-  }
+if (typeof fetch !== 'function') {
+  throw new Error(
+    'Invalid environment: global fetch not defined; `chatgpt` requires Node.js >= 18 at the moment due to Cloudflare protections'
+  )
+}

 export { fetch }
--- a/src/index.ts
+++ b/src/index.ts
@ -2,3 +2,4 @@ export * from './chatgpt-api'
 export * from './chatgpt-conversation'
 export * from './types'
 export * from './utils'
+export * from './openai-auth'
--- a/demos/openai-auth-puppeteer.ts
+++ b/demos/openai-auth-puppeteer.ts
@ -10,7 +10,11 @@ import StealthPlugin from 'puppeteer-extra-plugin-stealth'

 puppeteer.use(StealthPlugin())

-export type OpenAIAuthInfo = {
+/**
+ * Represents everything that's required to pass into `ChatGPTAPI` in order
+ * to authenticate with the unofficial ChatGPT API.
+ */
+export type OpenAIAuth = {
  userAgent: string
  clearanceToken: string
  sessionToken: string
@ -20,18 +24,29 @@ export type OpenAIAuthInfo = {
 /**
 * Bypasses OpenAI's use of Cloudflare to get the cookies required to use
 * ChatGPT. Uses Puppeteer with a stealth plugin under the hood.
+ *
+ * If you pass `email` and `password`, then it will log into the account and
+ * include a `sessionToken` in the response.
+ *
+ * If you don't pass `email` and `password`, then it will just return a valid
+ * `clearanceToken`.
+ *
+ * This can be useful because `clearanceToken` expires after ~2 hours, whereas
+ * `sessionToken` generally lasts much longer. We recommend renewing your
+ * `clearanceToken` every hour or so and creating a new instance of `ChatGPTAPI`
+ * with your updated credentials.
 */
-export async function getOpenAIAuthInfo({
+export async function getOpenAIAuth({
  email,
  password,
-  timeout = 2 * 60 * 1000,
+  timeoutMs = 2 * 60 * 1000,
  browser
 }: {
-  email: string
-  password: string
-  timeout?: number
+  email?: string
+  password?: string
+  timeoutMs?: number
  browser?: Browser
-}): Promise<OpenAIAuthInfo> {
+}): Promise<OpenAIAuth> {
  let page: Page
  let origBrowser = browser

@ -42,12 +57,18 @@ export async function getOpenAIAuthInfo({

    const userAgent = await browser.userAgent()
    page = (await browser.pages())[0] || (await browser.newPage())
-    page.setDefaultTimeout(timeout)
+    page.setDefaultTimeout(timeoutMs)

    await page.goto('https://chat.openai.com/auth/login')
-    await page.waitForSelector('#__next .btn-primary', { timeout })
+
+    // NOTE: this is where you may encounter a CAPTCHA
+
+    await page.waitForSelector('#__next .btn-primary', { timeout: timeoutMs })
+
+    // once we get to this point, the Cloudflare cookies are available
    await delay(1000)

+    // login as well (optional)
    if (email && password) {
      await Promise.all([
        page.click('#__next .btn-primary'),
@ -73,7 +94,7 @@ export async function getOpenAIAuthInfo({
      {}
    )

-    const authInfo: OpenAIAuthInfo = {
+    const authInfo: OpenAIAuth = {
      userAgent,
      clearanceToken: cookies['cf_clearance']?.value,
      sessionToken: cookies['__Secure-next-auth.session-token']?.value,
@ -83,7 +104,7 @@ export async function getOpenAIAuthInfo({
    return authInfo
  } catch (err) {
    console.error(err)
-    throw null
+    throw err
  } finally {
    if (origBrowser) {
      if (page) {
@ -98,6 +119,11 @@ export async function getOpenAIAuthInfo({
  }
 }

+/**
+ * Launches a non-puppeteer instance of Chrome. Note that in my testing, I wasn't
+ * able to use the built-in `puppeteer` version of Chromium because Cloudflare
+ * recognizes it and blocks access.
+ */
 export async function getBrowser(launchOptions?: PuppeteerLaunchOptions) {
  const macChromePath =
    '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'