feat: move puppeteer automation into main lib; update docs

pull/108/head
Travis Fischer 2022-12-12 11:23:03 -06:00
rodzic c1634b0537
commit 1d621d0c3c
8 zmienionych plików z 129 dodań i 103 usunięć

Wyświetl plik

@ -1,8 +1,7 @@
import dotenv from 'dotenv-safe'
import { oraPromise } from 'ora'
import { ChatGPTAPI } from '../src'
import { getOpenAIAuthInfo } from './openai-auth-puppeteer'
import { ChatGPTAPI, getOpenAIAuth } from '../src'
dotenv.config()
@ -17,7 +16,7 @@ async function main() {
const email = process.env.EMAIL
const password = process.env.PASSWORD
const authInfo = await getOpenAIAuthInfo({
const authInfo = await getOpenAIAuth({
email,
password
})

Wyświetl plik

@ -1,8 +1,7 @@
import dotenv from 'dotenv-safe'
import { oraPromise } from 'ora'
import { ChatGPTAPI } from '../src'
import { getOpenAIAuthInfo } from './openai-auth-puppeteer'
import { ChatGPTAPI, getOpenAIAuth } from '../src'
dotenv.config()
@ -17,7 +16,7 @@ async function main() {
const email = process.env.EMAIL
const password = process.env.PASSWORD
const authInfo = await getOpenAIAuthInfo({
const authInfo = await getOpenAIAuth({
email,
password
})

Wyświetl plik

@ -20,14 +20,13 @@
"build"
],
"engines": {
"node": ">=16.8"
"node": ">=18"
},
"scripts": {
"build": "tsup",
"dev": "tsup --watch",
"clean": "del build",
"prebuild": "run-s clean",
"postbuild": "[ -n CI ] && sed -i '' 's/await import(\"undici\")/null/' build/browser/index.js || echo 'skipping postbuild on CI'",
"predev": "run-s clean",
"pretest": "run-s build",
"docs": "typedoc",
@ -42,7 +41,10 @@
"p-timeout": "^6.0.0",
"remark": "^14.0.2",
"strip-markdown": "^5.0.0",
"uuid": "^9.0.0"
"delay": "^5.0.0",
"uuid": "^9.0.0",
"puppeteer-extra": "^3.3.4",
"puppeteer-extra-plugin-stealth": "^2.11.1"
},
"devDependencies": {
"@trivago/prettier-plugin-sort-imports": "^4.0.0",
@ -50,7 +52,6 @@
"@types/uuid": "^9.0.0",
"ava": "^5.1.0",
"del-cli": "^5.0.0",
"delay": "^5.0.0",
"dotenv-safe": "^8.2.0",
"husky": "^8.0.2",
"lint-staged": "^13.0.3",
@ -58,16 +59,14 @@
"ora": "^6.1.2",
"prettier": "^2.8.0",
"puppeteer": "^19.4.0",
"puppeteer-extra": "^3.3.4",
"puppeteer-extra-plugin-stealth": "^2.11.1",
"tsup": "^6.5.0",
"tsx": "^3.12.1",
"typedoc": "^0.23.21",
"typedoc-plugin-markdown": "^3.13.6",
"typescript": "^4.9.3"
},
"optionalDependencies": {
"undici": "^5.13.0"
"peerDependencies": {
"puppeteer": "*"
},
"lint-staged": {
"*.{ts,tsx}": [

Wyświetl plik

@ -4,28 +4,9 @@ Yesterday, OpenAI added additional Cloudflare protections that make it more diff
The demos have been updated to use Puppeteer to log in to ChatGPT and extract the Cloudflare `cf_clearance` cookie and OpenAI session token. 🔥
To use the updated version, first make sure you're using the latest version of this package and Node.js >= 18:
To use the updated version, make sure you're using the latest version of this package and Node.js >= 18. Then update your code to use the examples below, paying special attention to the sections on [Authentication](#authentication) and [Restrictions](#restrictions).
```ts
const api = new ChatGPTAPI({
sessionToken: process.env.SESSION_TOKEN,
clearanceToken: process.env.CLEARANCE_TOKEN,
userAgent: '' // needs to match your browser's user agent
})
await api.ensureAuth()
```
Restrictions on this method:
- Cloudflare `cf_clearance` **tokens expire after 2 hours**, so right now we recommend that you refresh your `cf_clearance` token every ~45 minutes or so.
- Your `user-agent` and `IP address` **must match** from the real browser window you're logged in with to the one you're using for `ChatGPTAPI`.
- This means that you currently can't log in with your laptop and then run the bot on a server or proxy somewhere.
- Cloudflare will still sometimes ask you to complete a CAPTCHA, so you may need to keep an eye on it and manually resolve the CAPTCHA. Automated CAPTCHA bypass is a WIP.
- You must use `node >= 18`. I'm using `v19.2.0` in my testing, but for some reason, all `fetch` requests using Node.js `v16` and `v17` fail at the moment (these use `undici` under the hood, whereas Node.js v18 and above use a built-in `fetch` based on `undici`).
- You should not be using this account while the bot is using it, because that browser window may refresh one of your tokens and invalidate the bot's session.
We're working hard in [this issue](https://github.com/transitive-bullshit/chatgpt-api/issues/96) to make this process easier and more automated.
We're working hard in [this issue](https://github.com/transitive-bullshit/chatgpt-api/issues/96) to improve this process. Keep in mind that this package will be updated to use the official API as soon as it's released. 💪
Cheers,
Travis
@ -48,7 +29,8 @@ Travis
- [Usage](#usage)
- [Docs](#docs)
- [Demos](#demos)
- [Session Tokens](#session-tokens)
- [Authentication](#authentication)
- [Restrictions](#restrictions)
- [Projects](#projects)
- [Compatibility](#compatibility)
- [Credits](#credits)
@ -69,15 +51,17 @@ npm install chatgpt
## Usage
```ts
import { ChatGPTAPI } from 'chatgpt'
import { ChatGPTAPI, getOpenAIAuth } from 'chatgpt'
async function example() {
const api = new ChatGPTAPI({
sessionToken: process.env.SESSION_TOKEN,
clearanceToken: process.env.CLEARANCE_TOKEN,
userAgent: 'TODO'
// uses puppeteer to bypass cloudflare (headful because you may have to solve
// a captcha)
const openAIAuth = await getOpenAIAuth({
email: process.env.EMAIL,
password: process.env.EMAIL
})
const api = new ChatGPTAPI({ ...openAIAuth })
await api.ensureAuth()
// send a message and wait for the response
@ -93,32 +77,23 @@ async function example() {
ChatGPT responses are formatted as markdown by default. If you want to work with plaintext instead, you can use:
```ts
const api = new ChatGPTAPI({
sessionToken: process.env.SESSION_TOKEN,
clearanceToken: process.env.CLEARANCE_TOKEN,
userAgent: 'TODO',
markdown: false
})
const api = new ChatGPTAPI({ ...openAIAuth, markdown: false })
```
If you want to automatically track the conversation, you can use `ChatGPTAPI.getConversation()`:
```ts
const api = new ChatGPTAPI({
sessionToken: process.env.SESSION_TOKEN,
clearanceToken: process.env.CLEARANCE_TOKEN,
userAgent: 'TODO'
})
const api = new ChatGPTAPI({ ...openAIAuth, markdown: false })
const conversation = api.getConversation()
// send a message and wait for the response
const response0 = await conversation.sendMessage('What is OpenAI?')
// send a follow-up prompt to the previous message and wait for the response
// send a follow-up
const response1 = await conversation.sendMessage('Can you expand on that?')
// send another follow-up to the same conversation
// send another follow-up
const response2 = await conversation.sendMessage('Oh cool; thank you')
```
@ -141,13 +116,14 @@ You can stream responses using the `onProgress` or `onConversationResponse` call
```js
async function example() {
// To use ESM in CommonJS, you can use a dynamic import
const { ChatGPTAPI } = await import('chatgpt')
const { ChatGPTAPI, getOpenAIAuth } = await import('chatgpt')
const api = new ChatGPTAPI({
sessionToken: process.env.SESSION_TOKEN,
clearanceToken: process.env.CLEARANCE_TOKEN,
userAgent: 'TODO'
const openAIAuth = await getOpenAIAuth({
email: process.env.EMAIL,
password: process.env.EMAIL
})
const api = new ChatGPTAPI({ ...openAIAuth })
await api.ensureAuth()
const response = await api.sendMessage('Hello World!')
@ -181,13 +157,21 @@ A [conversation demo](./demos/demo-conversation.ts) is also included:
npx tsx src/demo-conversation.ts
```
### Session Tokens
### Authentication
**This package requires a valid session token from ChatGPT to access it's unofficial REST API.**
#### Restrictions
As of December 11, 2021, it also requires a valid Cloudflare clearance token.
**Please read carefully**
There are two options to get these; either manually, or automated. For the automated way, see the `demos/` folder using Puppeteer.
- You must use `node >= 18`. I'm using `v19.2.0` in my testing, but for some reason, all `fetch` requests using Node.js `v16` and `v17` fail at the moment (these use `undici` under the hood, whereas Node.js v18 and above use a built-in `fetch` based on `undici`).
- Cloudflare `cf_clearance` **tokens expire after 2 hours**, so right now we recommend that you refresh your `cf_clearance` token every hour or so.
- Your `user-agent` and `IP address` **must match** from the real browser window you're logged in with to the one you're using for `ChatGPTAPI`.
- This means that you currently can't log in with your laptop and then run the bot on a server or proxy somewhere.
- Cloudflare will still sometimes ask you to complete a CAPTCHA, so you may need to keep an eye on it and manually resolve the CAPTCHA. Automated CAPTCHA bypass is coming soon.
- You should not be using this account while the bot is using it, because that browser window may refresh one of your tokens and invalidate the bot's session.
<details>
<summary>Getting tokens manually</summary>
To get a session token manually:
@ -195,8 +179,10 @@ To get a session token manually:
2. Open dev tools.
3. Open `Application` > `Cookies`.
![ChatGPT cookies](./media/session-token.png)
4. Copy the value for `__Secure-next-auth.session-token` and save it to your environment.
5. Copy the value for `cf_clearance` and save it to your environment.
4. Copy the value for `__Secure-next-auth.session-token` and save it to your environment. This will be your `sessionToken`.
5. Copy the value for `cf_clearance` and save it to your environment. This will be your `clearanceToken`.
</details>
> **Note**
> This package will switch to using the official API once it's released.
@ -255,11 +241,8 @@ If you create a cool integration, feel free to open a PR and add it to the list.
This package is ESM-only. It supports:
- Node.js >= 16.8
- If you need Node.js 14 support, use [`v1.4.0`](https://github.com/transitive-bullshit/chatgpt-api/releases/tag/v1.4.0)
- Edge runtimes like CF workers and Vercel edge functions
- Modern browsers
- Mainly meant for chrome extensions where your code is protected to a degree
- Node.js >= 18
- Node.js 17, 16, and 14 were supported in earlier versions, but OpenAI's Cloudflare update caused a bug with `undici` on v17 and v16 that we need to debug. So for now, use `node >= 18`
- We recommend against using `chatgpt` from client-side browser code because it would expose your private session token
- If you want to build a website using `chatgpt`, we recommend using it only from your backend API

Wyświetl plik

@ -29,6 +29,9 @@ export class ChatGPTAPI {
/**
* Creates a new client wrapper around the unofficial ChatGPT REST API.
*
* Note that your IP address and `userAgent` must match the same values that you used
* to obtain your `clearanceToken`.
*
* @param opts.sessionToken = **Required** OpenAI session token which can be found in a valid session's cookies (see readme for instructions)
* @param opts.clearanceToken = **Required** Cloudflare `cf_clearance` cookie value (see readme for instructions)
* @param apiBaseUrl - Optional override; the base URL for ChatGPT webapp's API (`/api`)
@ -124,6 +127,21 @@ export class ChatGPTAPI {
return this._user
}
/** Gets the current session token. */
get sessionToken() {
return this._sessionToken
}
/** Gets the current Cloudflare clearance token (`cf_clearance` cookie value). */
get clearanceToken() {
return this._clearanceToken
}
/** Gets the current user agent. */
get userAgent() {
return this._userAgent
}
/**
* Sends a message to ChatGPT, waits for the response to resolve, and returns
* the response.
@ -244,7 +262,23 @@ export class ChatGPTAPI {
reject(err)
}
}
}).catch(reject)
}).catch((err) => {
const errMessageL = err.toString().toLowerCase()
if (
response &&
(errMessageL === 'error: typeerror: terminated' ||
errMessageL === 'typeerror: terminated')
) {
// OpenAI sometimes forcefully terminates the socket from their end before
// the HTTP request has resolved cleanly. In my testing, these cases tend to
// happen when OpenAI has already send the last `response`, so we can ignore
// the `fetch` error in this case.
return resolve(response)
} else {
return reject(err)
}
})
})
if (timeoutMs) {

Wyświetl plik

@ -1,28 +1,13 @@
/// <reference lib="dom" />
let _undici: any
// Use `undici` for node.js 16 and 17
// Use `fetch` for node.js >= 18
// Use `fetch` for all other environments, including browsers
// NOTE: The top-level await is removed in a `postbuild` npm script for the
// browser build
const fetch =
globalThis.fetch ??
async function undiciFetchWrapper(
...args: Parameters<typeof globalThis.fetch>
): Promise<Response> {
if (!_undici) {
_undici = await import('undici')
}
const fetch = globalThis.fetch
if (typeof _undici?.fetch !== 'function') {
throw new Error(
'Invalid undici installation; please make sure undici is installed correctly in your node_modules. Note that this package requires Node.js >= 16.8'
)
}
return _undici.fetch(...args)
}
if (typeof fetch !== 'function') {
throw new Error(
'Invalid environment: global fetch not defined; `chatgpt` requires Node.js >= 18 at the moment due to Cloudflare protections'
)
}
export { fetch }

Wyświetl plik

@ -2,3 +2,4 @@ export * from './chatgpt-api'
export * from './chatgpt-conversation'
export * from './types'
export * from './utils'
export * from './openai-auth'

Wyświetl plik

@ -10,7 +10,11 @@ import StealthPlugin from 'puppeteer-extra-plugin-stealth'
puppeteer.use(StealthPlugin())
export type OpenAIAuthInfo = {
/**
* Represents everything that's required to pass into `ChatGPTAPI` in order
* to authenticate with the unofficial ChatGPT API.
*/
export type OpenAIAuth = {
userAgent: string
clearanceToken: string
sessionToken: string
@ -20,18 +24,29 @@ export type OpenAIAuthInfo = {
/**
* Bypasses OpenAI's use of Cloudflare to get the cookies required to use
* ChatGPT. Uses Puppeteer with a stealth plugin under the hood.
*
* If you pass `email` and `password`, then it will log into the account and
* include a `sessionToken` in the response.
*
* If you don't pass `email` and `password`, then it will just return a valid
* `clearanceToken`.
*
* This can be useful because `clearanceToken` expires after ~2 hours, whereas
* `sessionToken` generally lasts much longer. We recommend renewing your
* `clearanceToken` every hour or so and creating a new instance of `ChatGPTAPI`
* with your updated credentials.
*/
export async function getOpenAIAuthInfo({
export async function getOpenAIAuth({
email,
password,
timeout = 2 * 60 * 1000,
timeoutMs = 2 * 60 * 1000,
browser
}: {
email: string
password: string
timeout?: number
email?: string
password?: string
timeoutMs?: number
browser?: Browser
}): Promise<OpenAIAuthInfo> {
}): Promise<OpenAIAuth> {
let page: Page
let origBrowser = browser
@ -42,12 +57,18 @@ export async function getOpenAIAuthInfo({
const userAgent = await browser.userAgent()
page = (await browser.pages())[0] || (await browser.newPage())
page.setDefaultTimeout(timeout)
page.setDefaultTimeout(timeoutMs)
await page.goto('https://chat.openai.com/auth/login')
await page.waitForSelector('#__next .btn-primary', { timeout })
// NOTE: this is where you may encounter a CAPTCHA
await page.waitForSelector('#__next .btn-primary', { timeout: timeoutMs })
// once we get to this point, the Cloudflare cookies are available
await delay(1000)
// login as well (optional)
if (email && password) {
await Promise.all([
page.click('#__next .btn-primary'),
@ -73,7 +94,7 @@ export async function getOpenAIAuthInfo({
{}
)
const authInfo: OpenAIAuthInfo = {
const authInfo: OpenAIAuth = {
userAgent,
clearanceToken: cookies['cf_clearance']?.value,
sessionToken: cookies['__Secure-next-auth.session-token']?.value,
@ -83,7 +104,7 @@ export async function getOpenAIAuthInfo({
return authInfo
} catch (err) {
console.error(err)
throw null
throw err
} finally {
if (origBrowser) {
if (page) {
@ -98,6 +119,11 @@ export async function getOpenAIAuthInfo({
}
}
/**
* Launches a non-puppeteer instance of Chrome. Note that in my testing, I wasn't
* able to use the built-in `puppeteer` version of Chromium because Cloudflare
* recognizes it and blocks access.
*/
export async function getBrowser(launchOptions?: PuppeteerLaunchOptions) {
const macChromePath =
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'