feat: improve browser version with conversation support

2022-12-16 00:28:30 -06:00 · 2022-12-16 00:28:30 -06:00 · ad7853a096
commit ad7853a096
--- a/demos/demo.ts
+++ b/demos/demo.ts
@ -1,3 +1,4 @@
+import delay from 'delay'
 import dotenv from 'dotenv-safe'
 import { oraPromise } from 'ora'

@ -16,7 +17,12 @@ async function main() {
  const email = process.env.OPENAI_EMAIL
  const password = process.env.OPENAI_PASSWORD

-  const api = new ChatGPTAPIBrowser({ email, password, debug: true })
+  const api = new ChatGPTAPIBrowser({
+    email,
+    password,
+    debug: false,
+    minimize: true
+  })
  await api.init()

  const prompt =
--- a/readme.md
+++ b/readme.md
@ -24,7 +24,7 @@ const response = await api.sendMessage('Hello World!')

 Note that this solution is not lightweight, but it does work a lot more consistently than the REST API-based versions. I'm currently using this solution to power 10 OpenAI accounts concurrently across 10 minimized Chrome windows for my [Twitter bot](https://github.com/transitive-bullshit/chatgpt-twitter-bot). 😂

-If you get a "ChatGPT is at capacity" error when logging in, note that this is also happening quite frequently on the official webapp. Their servers are overloaded, and we're all trying our best to offer access to this amazing technology.
+If you get a "ChatGPT is at capacity" error when logging in, note that this can also happen on the official webapp as well. Their servers can get overloaded at times, and we're all trying our best to offer access to this amazing technology.

 To use the updated version, **make sure you're using the latest version of this package and Node.js >= 18**. Then update your code following the examples below, paying special attention to the sections on [Authentication](#authentication) and [Restrictions](#restrictions).

--- a/src/chatgpt-api-browser.ts
+++ b/src/chatgpt-api-browser.ts
@ -1,16 +1,23 @@
 import delay from 'delay'
-import html2md from 'html-to-md'
-import pTimeout from 'p-timeout'
 import type { Browser, HTTPRequest, HTTPResponse, Page } from 'puppeteer'
+import { v4 as uuidv4 } from 'uuid'

+import * as types from './types'
 import { getBrowser, getOpenAIAuth } from './openai-auth'
-import { isRelevantRequest, maximizePage, minimizePage } from './utils'
+import {
+  browserPostEventStream,
+  isRelevantRequest,
+  maximizePage,
+  minimizePage
+} from './utils'

 export class ChatGPTAPIBrowser {
  protected _markdown: boolean
  protected _debug: boolean
+  protected _minimize: boolean
  protected _isGoogleLogin: boolean
  protected _captchaToken: string
+  protected _accessToken: string

  protected _email: string
  protected _password: string
@ -32,6 +39,7 @@ export class ChatGPTAPIBrowser {
    debug?: boolean

    isGoogleLogin?: boolean
+    minimize?: boolean
    captchaToken?: string
  }) {
    const {
@ -40,6 +48,7 @@ export class ChatGPTAPIBrowser {
      markdown = true,
      debug = false,
      isGoogleLogin = false,
+      minimize = true,
      captchaToken
    } = opts

@ -49,6 +58,7 @@ export class ChatGPTAPIBrowser {
    this._markdown = !!markdown
    this._debug = !!debug
    this._isGoogleLogin = !!isGoogleLogin
+    this._minimize = !!minimize
    this._captchaToken = captchaToken
  }

@ -64,6 +74,9 @@ export class ChatGPTAPIBrowser {
      this._page =
        (await this._browser.pages())[0] || (await this._browser.newPage())

+      this._page.on('request', this._onRequest.bind(this))
+      this._page.on('response', this._onResponse.bind(this))
+
      // bypass cloudflare and login
      await getOpenAIAuth({
        email: this._email,
@ -114,10 +127,9 @@ export class ChatGPTAPIBrowser {
      return false
    }

-    await minimizePage(this._page)
-
-    this._page.on('request', this._onRequest.bind(this))
-    this._page.on('response', this._onResponse.bind(this))
+    if (this._minimize) {
+      await minimizePage(this._page)
+    }

    return true
  }
@ -197,6 +209,13 @@ export class ChatGPTAPIBrowser {
    } else if (url.endsWith('api/auth/session')) {
      if (status === 403) {
        await this.handle403Error()
+      } else {
+        const session: types.SessionResult = body
+
+        console.log('ACCESS TOKEN', session.accessToken)
+        if (session?.accessToken) {
+          this._accessToken = session.accessToken
+        }
      }
    }
  }
@ -209,7 +228,9 @@ export class ChatGPTAPIBrowser {
        waitUntil: 'networkidle2',
        timeout: 2 * 60 * 1000 // 2 minutes
      })
-      await minimizePage(this._page)
+      if (this._minimize) {
+        await minimizePage(this._page)
+      }
    } catch (err) {
      console.error(
        `ChatGPT "${this._email}" error refreshing session`,
@ -228,121 +249,181 @@ export class ChatGPTAPIBrowser {
    }
  }

-  async getLastMessage(): Promise<string | null> {
-    const messages = await this.getMessages()
+  // async getLastMessage(): Promise<string | null> {
+  //   const messages = await this.getMessages()

-    if (messages) {
-      return messages[messages.length - 1]
-    } else {
-      return null
-    }
-  }
+  //   if (messages) {
+  //     return messages[messages.length - 1]
+  //   } else {
+  //     return null
+  //   }
+  // }

-  async getPrompts(): Promise<string[]> {
-    // Get all prompts
-    const messages = await this._page.$$(
-      '.text-base:has(.whitespace-pre-wrap):not(:has(button:nth-child(2))) .whitespace-pre-wrap'
-    )
+  // async getPrompts(): Promise<string[]> {
+  //   // Get all prompts
+  //   const messages = await this._page.$$(
+  //     '.text-base:has(.whitespace-pre-wrap):not(:has(button:nth-child(2))) .whitespace-pre-wrap'
+  //   )

-    // Prompts are always plaintext
-    return Promise.all(messages.map((a) => a.evaluate((el) => el.textContent)))
-  }
+  //   // Prompts are always plaintext
+  //   return Promise.all(messages.map((a) => a.evaluate((el) => el.textContent)))
+  // }

-  async getMessages(): Promise<string[]> {
-    // Get all complete messages
-    // (in-progress messages that are being streamed back don't contain action buttons)
-    const messages = await this._page.$$(
-      '.text-base:has(.whitespace-pre-wrap):has(button:nth-child(2)) .whitespace-pre-wrap'
-    )
+  // async getMessages(): Promise<string[]> {
+  //   // Get all complete messages
+  //   // (in-progress messages that are being streamed back don't contain action buttons)
+  //   const messages = await this._page.$$(
+  //     '.text-base:has(.whitespace-pre-wrap):has(button:nth-child(2)) .whitespace-pre-wrap'
+  //   )

-    if (this._markdown) {
-      const htmlMessages = await Promise.all(
-        messages.map((a) => a.evaluate((el) => el.innerHTML))
-      )
+  //   if (this._markdown) {
+  //     const htmlMessages = await Promise.all(
+  //       messages.map((a) => a.evaluate((el) => el.innerHTML))
+  //     )

-      const markdownMessages = htmlMessages.map((messageHtml) => {
-        // parse markdown from message HTML
-        messageHtml = messageHtml
-          .replaceAll('Copy code</button>', '</button>')
-          .replace(/Copy code\s*<\/button>/gim, '</button>')
+  //     const markdownMessages = htmlMessages.map((messageHtml) => {
+  //       // parse markdown from message HTML
+  //       messageHtml = messageHtml
+  //         .replaceAll('Copy code</button>', '</button>')
+  //         .replace(/Copy code\s*<\/button>/gim, '</button>')

-        return html2md(messageHtml, {
-          ignoreTags: [
-            'button',
-            'svg',
-            'style',
-            'form',
-            'noscript',
-            'script',
-            'meta',
-            'head'
-          ],
-          skipTags: ['button', 'svg']
-        })
-      })
+  //       return html2md(messageHtml, {
+  //         ignoreTags: [
+  //           'button',
+  //           'svg',
+  //           'style',
+  //           'form',
+  //           'noscript',
+  //           'script',
+  //           'meta',
+  //           'head'
+  //         ],
+  //         skipTags: ['button', 'svg']
+  //       })
+  //     })

-      return markdownMessages
-    } else {
-      // plaintext
-      const plaintextMessages = await Promise.all(
-        messages.map((a) => a.evaluate((el) => el.textContent))
-      )
-      return plaintextMessages
-    }
-  }
+  //     return markdownMessages
+  //   } else {
+  //     // plaintext
+  //     const plaintextMessages = await Promise.all(
+  //       messages.map((a) => a.evaluate((el) => el.textContent))
+  //     )
+  //     return plaintextMessages
+  //   }
+  // }

  async sendMessage(
    message: string,
-    opts: {
-      timeoutMs?: number
-    } = {}
+    opts: types.SendMessageOptions = {}
  ): Promise<string> {
-    const { timeoutMs } = opts
+    const {
+      conversationId,
+      parentMessageId = uuidv4(),
+      messageId = uuidv4(),
+      action = 'next',
+      // TODO
+      // timeoutMs,
+      // onProgress,
+      onConversationResponse
+    } = opts

    const inputBox = await this._getInputBox()
-    if (!inputBox) throw new Error('not signed in')
-
-    const lastMessage = await this.getLastMessage()
-
-    await inputBox.focus()
-    const paragraphs = message.split('\n')
-    for (let i = 0; i < paragraphs.length; i++) {
-      await inputBox.type(paragraphs[i], { delay: 0 })
-      if (i < paragraphs.length - 1) {
-        await this._page.keyboard.down('Shift')
-        await inputBox.press('Enter')
-        await this._page.keyboard.up('Shift')
-      } else {
-        await inputBox.press('Enter')
-      }
+    if (!inputBox || !this._accessToken) {
+      const error = new types.ChatGPTError('Not signed in')
+      error.statusCode = 401
+      throw error
    }

-    const responseP = new Promise<string>(async (resolve, reject) => {
-      try {
-        do {
-          await delay(1000)
-
-          // TODO: this logic needs some work because we can have repeat messages...
-          const newLastMessage = await this.getLastMessage()
-          if (
-            newLastMessage &&
-            lastMessage?.toLowerCase() !== newLastMessage?.toLowerCase()
-          ) {
-            return resolve(newLastMessage)
+    const url = `https://chat.openai.com/backend-api/conversation`
+    const body: types.ConversationJSONBody = {
+      action,
+      messages: [
+        {
+          id: messageId,
+          role: 'user',
+          content: {
+            content_type: 'text',
+            parts: [message]
          }
-        } while (true)
-      } catch (err) {
-        return reject(err)
-      }
-    })
-
-    if (timeoutMs) {
-      return pTimeout(responseP, {
-        milliseconds: timeoutMs
-      })
-    } else {
-      return responseP
+        }
+      ],
+      model: 'text-davinci-002-render',
+      parent_message_id: parentMessageId
    }
+
+    if (conversationId) {
+      body.conversation_id = conversationId
+    }
+
+    // console.log('>>> EVALUATE', url, this._accessToken, body)
+    const result = await this._page.evaluate(
+      browserPostEventStream,
+      url,
+      this._accessToken,
+      body
+    )
+    // console.log('<<< EVALUATE', result)
+
+    if (result.error) {
+      const error = new types.ChatGPTError(result.error.message)
+      error.statusCode = result.error.statusCode
+      error.statusText = result.error.statusText
+
+      if (error.statusCode === 403) {
+        await this.handle403Error()
+      }
+
+      throw error
+    }
+
+    // TODO: support sending partial response events
+    if (onConversationResponse) {
+      onConversationResponse(result.conversationResponse)
+    }
+
+    return result.response
+
+    // const lastMessage = await this.getLastMessage()
+
+    // await inputBox.focus()
+    // const paragraphs = message.split('\n')
+    // for (let i = 0; i < paragraphs.length; i++) {
+    //   await inputBox.type(paragraphs[i], { delay: 0 })
+    //   if (i < paragraphs.length - 1) {
+    //     await this._page.keyboard.down('Shift')
+    //     await inputBox.press('Enter')
+    //     await this._page.keyboard.up('Shift')
+    //   } else {
+    //     await inputBox.press('Enter')
+    //   }
+    // }
+
+    // const responseP = new Promise<string>(async (resolve, reject) => {
+    //   try {
+    //     do {
+    //       await delay(1000)
+
+    //       // TODO: this logic needs some work because we can have repeat messages...
+    //       const newLastMessage = await this.getLastMessage()
+    //       if (
+    //         newLastMessage &&
+    //         lastMessage?.toLowerCase() !== newLastMessage?.toLowerCase()
+    //       ) {
+    //         return resolve(newLastMessage)
+    //       }
+    //     } while (true)
+    //   } catch (err) {
+    //     return reject(err)
+    //   }
+    // })
+
+    // if (timeoutMs) {
+    //   return pTimeout(responseP, {
+    //     milliseconds: timeoutMs
+    //   })
+    // } else {
+    //   return responseP
+    // }
  }

  async resetThread() {
--- a/src/openai-auth.ts
+++ b/src/openai-auth.ts
@ -127,7 +127,7 @@ export async function getOpenAIAuth({
        }

        await page.click('button[type="submit"]')
-        await page.waitForSelector('#password')
+        await page.waitForSelector('#password', { timeout: timeoutMs })
        await page.type('#password', password, { delay: 10 })
        submitP = () => page.click('button[type="submit"]')
      }
--- a/src/types.ts
+++ b/src/types.ts
@ -297,3 +297,19 @@ export class ChatGPTError extends Error {
  response?: Response
  originalError?: Error
 }
+
+export type ChatError = {
+  error: { message: string; statusCode?: number; statusText?: string }
+  response: null
+  conversationId?: string
+  messageId?: string
+  conversationResponse?: ConversationResponseEvent
+}
+
+export type ChatResponse = {
+  error: null
+  response: string
+  conversationId: string
+  messageId: string
+  conversationResponse?: ConversationResponseEvent
+}
--- a/src/utils.ts
+++ b/src/utils.ts
@ -1,7 +1,13 @@
+import type {
+  EventSourceParseCallback,
+  EventSourceParser
+} from 'eventsource-parser'
 import type { Page } from 'puppeteer'
 import { remark } from 'remark'
 import stripMarkdown from 'strip-markdown'

+import * as types from './types'
+
 export function markdownToText(markdown?: string): string {
  return remark()
    .use(stripMarkdown)
@ -44,7 +50,10 @@ export function isRelevantRequest(url: string): boolean {
    return false
  }

-  if (pathname.startsWith('/_next')) {
+  if (
+    !pathname.startsWith('/backend-api/') &&
+    !pathname.startsWith('/api/auth/session')
+  ) {
    return false
  }

@ -54,3 +63,307 @@ export function isRelevantRequest(url: string): boolean {

  return true
 }
+
+/**
+ * This function is injected into the ChatGPT webapp page using puppeteer. It
+ * has to be fully self-contained, so we copied a few third-party sources and
+ * included them in here.
+ */
+export async function browserPostEventStream(
+  url: string,
+  accessToken: string,
+  body: types.ConversationJSONBody
+): Promise<types.ChatError | types.ChatResponse> {
+  const BOM = [239, 187, 191]
+
+  // Workaround for https://github.com/esbuild-kit/tsx/issues/113
+  globalThis.__name = () => undefined
+
+  let conversationId: string = body?.conversation_id
+  let messageId: string = body?.messages?.[0]?.id
+  let response = ''
+
+  try {
+    console.log('browserPostEventStream', url, accessToken, body)
+
+    const res = await fetch(url, {
+      method: 'POST',
+      body: JSON.stringify(body),
+      headers: {
+        accept: 'text/event-stream',
+        'x-openai-assistant-app-id': '',
+        authorization: `Bearer ${accessToken}`,
+        'content-type': 'application/json'
+      }
+    })
+
+    console.log('EVENT', res)
+
+    if (!res.ok) {
+      return {
+        error: {
+          message: `ChatGPTAPI error ${res.status || res.statusText}`,
+          statusCode: res.status,
+          statusText: res.statusText
+        },
+        response: null,
+        conversationId,
+        messageId
+      }
+    }
+
+    return await new Promise<types.ChatResponse>(async (resolve, reject) => {
+      function onMessage(data: string) {
+        if (data === '[DONE]') {
+          return resolve({
+            error: null,
+            response,
+            conversationId,
+            messageId
+          })
+        }
+
+        try {
+          const parsedData: types.ConversationResponseEvent = JSON.parse(data)
+          if (parsedData.conversation_id) {
+            conversationId = parsedData.conversation_id
+          }
+
+          if (parsedData.message?.id) {
+            messageId = parsedData.message.id
+          }
+
+          const partialResponse = parsedData.message?.content?.parts?.[0]
+          if (partialResponse) {
+            response = partialResponse
+          }
+        } catch (err) {
+          console.warn('fetchSSE onMessage unexpected error', err)
+          reject(err)
+        }
+      }
+
+      const parser = createParser((event) => {
+        if (event.type === 'event') {
+          onMessage(event.data)
+        }
+      })
+
+      for await (const chunk of streamAsyncIterable(res.body)) {
+        const str = new TextDecoder().decode(chunk)
+        parser.feed(str)
+      }
+    })
+  } catch (err) {
+    const errMessageL = err.toString().toLowerCase()
+
+    if (
+      response &&
+      (errMessageL === 'error: typeerror: terminated' ||
+        errMessageL === 'typeerror: terminated')
+    ) {
+      // OpenAI sometimes forcefully terminates the socket from their end before
+      // the HTTP request has resolved cleanly. In my testing, these cases tend to
+      // happen when OpenAI has already send the last `response`, so we can ignore
+      // the `fetch` error in this case.
+      return {
+        error: null,
+        response,
+        conversationId,
+        messageId
+      }
+    }
+
+    return {
+      error: {
+        message: err.toString(),
+        statusCode: err.statusCode || err.status || err.response?.statusCode,
+        statusText: err.statusText || err.response?.statusText
+      },
+      response: null,
+      conversationId,
+      messageId
+    }
+  }
+
+  async function* streamAsyncIterable<T>(stream: ReadableStream<T>) {
+    const reader = stream.getReader()
+    try {
+      while (true) {
+        const { done, value } = await reader.read()
+        if (done) {
+          return
+        }
+        yield value
+      }
+    } finally {
+      reader.releaseLock()
+    }
+  }
+
+  function createParser(onParse: EventSourceParseCallback): EventSourceParser {
+    // Processing state
+    let isFirstChunk: boolean
+    let buffer: string
+    let startingPosition: number
+    let startingFieldLength: number
+
+    // Event state
+    let eventId: string | undefined
+    let eventName: string | undefined
+    let data: string
+
+    reset()
+    return { feed, reset }
+
+    function reset(): void {
+      isFirstChunk = true
+      buffer = ''
+      startingPosition = 0
+      startingFieldLength = -1
+
+      eventId = undefined
+      eventName = undefined
+      data = ''
+    }
+
+    function feed(chunk: string): void {
+      buffer = buffer ? buffer + chunk : chunk
+
+      // Strip any UTF8 byte order mark (BOM) at the start of the stream.
+      // Note that we do not strip any non - UTF8 BOM, as eventsource streams are
+      // always decoded as UTF8 as per the specification.
+      if (isFirstChunk && hasBom(buffer)) {
+        buffer = buffer.slice(BOM.length)
+      }
+
+      isFirstChunk = false
+
+      // Set up chunk-specific processing state
+      const length = buffer.length
+      let position = 0
+      let discardTrailingNewline = false
+
+      // Read the current buffer byte by byte
+      while (position < length) {
+        // EventSource allows for carriage return + line feed, which means we
+        // need to ignore a linefeed character if the previous character was a
+        // carriage return
+        // @todo refactor to reduce nesting, consider checking previous byte?
+        // @todo but consider multiple chunks etc
+        if (discardTrailingNewline) {
+          if (buffer[position] === '\n') {
+            ++position
+          }
+          discardTrailingNewline = false
+        }
+
+        let lineLength = -1
+        let fieldLength = startingFieldLength
+        let character: string
+
+        for (
+          let index = startingPosition;
+          lineLength < 0 && index < length;
+          ++index
+        ) {
+          character = buffer[index]
+          if (character === ':' && fieldLength < 0) {
+            fieldLength = index - position
+          } else if (character === '\r') {
+            discardTrailingNewline = true
+            lineLength = index - position
+          } else if (character === '\n') {
+            lineLength = index - position
+          }
+        }
+
+        if (lineLength < 0) {
+          startingPosition = length - position
+          startingFieldLength = fieldLength
+          break
+        } else {
+          startingPosition = 0
+          startingFieldLength = -1
+        }
+
+        parseEventStreamLine(buffer, position, fieldLength, lineLength)
+
+        position += lineLength + 1
+      }
+
+      if (position === length) {
+        // If we consumed the entire buffer to read the event, reset the buffer
+        buffer = ''
+      } else if (position > 0) {
+        // If there are bytes left to process, set the buffer to the unprocessed
+        // portion of the buffer only
+        buffer = buffer.slice(position)
+      }
+    }
+
+    function parseEventStreamLine(
+      lineBuffer: string,
+      index: number,
+      fieldLength: number,
+      lineLength: number
+    ) {
+      if (lineLength === 0) {
+        // We reached the last line of this event
+        if (data.length > 0) {
+          onParse({
+            type: 'event',
+            id: eventId,
+            event: eventName || undefined,
+            data: data.slice(0, -1) // remove trailing newline
+          })
+
+          data = ''
+          eventId = undefined
+        }
+        eventName = undefined
+        return
+      }
+
+      const noValue = fieldLength < 0
+      const field = lineBuffer.slice(
+        index,
+        index + (noValue ? lineLength : fieldLength)
+      )
+      let step = 0
+
+      if (noValue) {
+        step = lineLength
+      } else if (lineBuffer[index + fieldLength + 1] === ' ') {
+        step = fieldLength + 2
+      } else {
+        step = fieldLength + 1
+      }
+
+      const position = index + step
+      const valueLength = lineLength - step
+      const value = lineBuffer
+        .slice(position, position + valueLength)
+        .toString()
+
+      if (field === 'data') {
+        data += value ? `${value}\n` : '\n'
+      } else if (field === 'event') {
+        eventName = value
+      } else if (field === 'id' && !value.includes('\u0000')) {
+        eventId = value
+      } else if (field === 'retry') {
+        const retry = parseInt(value, 10)
+        if (!Number.isNaN(retry)) {
+          onParse({ type: 'reconnect-interval', value: retry })
+        }
+      }
+    }
+  }
+
+  function hasBom(buffer: string) {
+    return BOM.every(
+      (charCode: number, index: number) => buffer.charCodeAt(index) === charCode
+    )
+  }
+}