feat: improve browser version with conversation support

2022-12-16 00:28:30 -06:00 · 2022-12-16 00:28:30 -06:00 · ad7853a096
commit ad7853a096
--- a/demos/demo.ts
+++ b/demos/demo.ts
@ -1,3 +1,4 @@
 import delay from 'delay'
 import dotenv from 'dotenv-safe'
 import { oraPromise } from 'ora'
@ -16,7 +17,12 @@ async function main() {
  const email = process.env.OPENAI_EMAIL
  const password = process.env.OPENAI_PASSWORD
-  const api = new ChatGPTAPIBrowser({ email, password, debug: true })
+  const api = new ChatGPTAPIBrowser({
    email,
    password,
    debug: false,
    minimize: true
  })
  await api.init()
  const prompt =
--- a/readme.md
+++ b/readme.md
@ -24,7 +24,7 @@ const response = await api.sendMessage('Hello World!')
 Note that this solution is not lightweight, but it does work a lot more consistently than the REST API-based versions. I'm currently using this solution to power 10 OpenAI accounts concurrently across 10 minimized Chrome windows for my [Twitter bot](https://github.com/transitive-bullshit/chatgpt-twitter-bot). 😂
-If you get a "ChatGPT is at capacity" error when logging in, note that this is also happening quite frequently on the official webapp. Their servers are overloaded, and we're all trying our best to offer access to this amazing technology.
+If you get a "ChatGPT is at capacity" error when logging in, note that this can also happen on the official webapp as well. Their servers can get overloaded at times, and we're all trying our best to offer access to this amazing technology.
 To use the updated version, **make sure you're using the latest version of this package and Node.js >= 18**. Then update your code following the examples below, paying special attention to the sections on [Authentication](#authentication) and [Restrictions](#restrictions).
--- a/src/chatgpt-api-browser.ts
+++ b/src/chatgpt-api-browser.ts
@ -1,16 +1,23 @@
 import delay from 'delay'
 import html2md from 'html-to-md'
 import pTimeout from 'p-timeout'
 import type { Browser, HTTPRequest, HTTPResponse, Page } from 'puppeteer'
 import { v4 as uuidv4 } from 'uuid'
 import * as types from './types'
 import { getBrowser, getOpenAIAuth } from './openai-auth'
-import { isRelevantRequest, maximizePage, minimizePage } from './utils'
+import {
  browserPostEventStream,
  isRelevantRequest,
  maximizePage,
  minimizePage
 } from './utils'
 export class ChatGPTAPIBrowser {
  protected _markdown: boolean
  protected _debug: boolean
  protected _minimize: boolean
  protected _isGoogleLogin: boolean
  protected _captchaToken: string
  protected _accessToken: string
  protected _email: string
  protected _password: string
@ -32,6 +39,7 @@ export class ChatGPTAPIBrowser {
    debug?: boolean
    isGoogleLogin?: boolean
    minimize?: boolean
    captchaToken?: string
  }) {
    const {
@ -40,6 +48,7 @@ export class ChatGPTAPIBrowser {
      markdown = true,
      debug = false,
      isGoogleLogin = false,
      minimize = true,
      captchaToken
    } = opts
@ -49,6 +58,7 @@ export class ChatGPTAPIBrowser {
    this._markdown = !!markdown
    this._debug = !!debug
    this._isGoogleLogin = !!isGoogleLogin
    this._minimize = !!minimize
    this._captchaToken = captchaToken
  }
@ -64,6 +74,9 @@ export class ChatGPTAPIBrowser {
      this._page =
        (await this._browser.pages())[0] || (await this._browser.newPage())
      this._page.on('request', this._onRequest.bind(this))
      this._page.on('response', this._onResponse.bind(this))
      // bypass cloudflare and login
      await getOpenAIAuth({
        email: this._email,
@ -114,10 +127,9 @@ export class ChatGPTAPIBrowser {
      return false
    }
-    await minimizePage(this._page)
+    if (this._minimize) {
-
+      await minimizePage(this._page)
-    this._page.on('request', this._onRequest.bind(this))
+    }
    this._page.on('response', this._onResponse.bind(this))
    return true
  }
@ -197,6 +209,13 @@ export class ChatGPTAPIBrowser {
    } else if (url.endsWith('api/auth/session')) {
      if (status === 403) {
        await this.handle403Error()
      } else {
        const session: types.SessionResult = body
        console.log('ACCESS TOKEN', session.accessToken)
        if (session?.accessToken) {
          this._accessToken = session.accessToken
        }
      }
    }
  }
@ -209,7 +228,9 @@ export class ChatGPTAPIBrowser {
        waitUntil: 'networkidle2',
        timeout: 2 * 60 * 1000 // 2 minutes
      })
-      await minimizePage(this._page)
+      if (this._minimize) {
        await minimizePage(this._page)
      }
    } catch (err) {
      console.error(
        `ChatGPT "${this._email}" error refreshing session`,
@ -228,121 +249,181 @@ export class ChatGPTAPIBrowser {
    }
  }
-  async getLastMessage(): Promise<string | null> {
+  // async getLastMessage(): Promise<string | null> {
-    const messages = await this.getMessages()
+  //   const messages = await this.getMessages()
-    if (messages) {
+  //   if (messages) {
-      return messages[messages.length - 1]
+  //     return messages[messages.length - 1]
-    } else {
+  //   } else {
-      return null
+  //     return null
-    }
+  //   }
-  }
+  // }
-  async getPrompts(): Promise<string[]> {
+  // async getPrompts(): Promise<string[]> {
-    // Get all prompts
+  //   // Get all prompts
-    const messages = await this._page.$$(
+  //   const messages = await this._page.$$(
-      '.text-base:has(.whitespace-pre-wrap):not(:has(button:nth-child(2))) .whitespace-pre-wrap'
+  //     '.text-base:has(.whitespace-pre-wrap):not(:has(button:nth-child(2))) .whitespace-pre-wrap'
-    )
+  //   )
-    // Prompts are always plaintext
+  //   // Prompts are always plaintext
-    return Promise.all(messages.map((a) => a.evaluate((el) => el.textContent)))
+  //   return Promise.all(messages.map((a) => a.evaluate((el) => el.textContent)))
-  }
+  // }
-  async getMessages(): Promise<string[]> {
+  // async getMessages(): Promise<string[]> {
-    // Get all complete messages
+  //   // Get all complete messages
-    // (in-progress messages that are being streamed back don't contain action buttons)
+  //   // (in-progress messages that are being streamed back don't contain action buttons)
-    const messages = await this._page.$$(
+  //   const messages = await this._page.$$(
-      '.text-base:has(.whitespace-pre-wrap):has(button:nth-child(2)) .whitespace-pre-wrap'
+  //     '.text-base:has(.whitespace-pre-wrap):has(button:nth-child(2)) .whitespace-pre-wrap'
-    )
+  //   )
-    if (this._markdown) {
+  //   if (this._markdown) {
-      const htmlMessages = await Promise.all(
+  //     const htmlMessages = await Promise.all(
-        messages.map((a) => a.evaluate((el) => el.innerHTML))
+  //       messages.map((a) => a.evaluate((el) => el.innerHTML))
-      )
+  //     )
-      const markdownMessages = htmlMessages.map((messageHtml) => {
+  //     const markdownMessages = htmlMessages.map((messageHtml) => {
-        // parse markdown from message HTML
+  //       // parse markdown from message HTML
-        messageHtml = messageHtml
+  //       messageHtml = messageHtml
-          .replaceAll('Copy code</button>', '</button>')
+  //         .replaceAll('Copy code</button>', '</button>')
-          .replace(/Copy code\s*<\/button>/gim, '</button>')
+  //         .replace(/Copy code\s*<\/button>/gim, '</button>')
-        return html2md(messageHtml, {
+  //       return html2md(messageHtml, {
-          ignoreTags: [
+  //         ignoreTags: [
-            'button',
+  //           'button',
-            'svg',
+  //           'svg',
-            'style',
+  //           'style',
-            'form',
+  //           'form',
-            'noscript',
+  //           'noscript',
-            'script',
+  //           'script',
-            'meta',
+  //           'meta',
-            'head'
+  //           'head'
-          ],
+  //         ],
-          skipTags: ['button', 'svg']
+  //         skipTags: ['button', 'svg']
-        })
+  //       })
-      })
+  //     })
-      return markdownMessages
+  //     return markdownMessages
-    } else {
+  //   } else {
-      // plaintext
+  //     // plaintext
-      const plaintextMessages = await Promise.all(
+  //     const plaintextMessages = await Promise.all(
-        messages.map((a) => a.evaluate((el) => el.textContent))
+  //       messages.map((a) => a.evaluate((el) => el.textContent))
-      )
+  //     )
-      return plaintextMessages
+  //     return plaintextMessages
-    }
+  //   }
-  }
+  // }
  async sendMessage(
    message: string,
-    opts: {
+    opts: types.SendMessageOptions = {}
      timeoutMs?: number
    } = {}
  ): Promise<string> {
-    const { timeoutMs } = opts
+    const {
      conversationId,
      parentMessageId = uuidv4(),
      messageId = uuidv4(),
      action = 'next',
      // TODO
      // timeoutMs,
      // onProgress,
      onConversationResponse
    } = opts
    const inputBox = await this._getInputBox()
-    if (!inputBox) throw new Error('not signed in')
+    if (!inputBox || !this._accessToken) {
-
+      const error = new types.ChatGPTError('Not signed in')
-    const lastMessage = await this.getLastMessage()
+      error.statusCode = 401
-
+      throw error
    await inputBox.focus()
    const paragraphs = message.split('\n')
    for (let i = 0; i < paragraphs.length; i++) {
      await inputBox.type(paragraphs[i], { delay: 0 })
      if (i < paragraphs.length - 1) {
        await this._page.keyboard.down('Shift')
        await inputBox.press('Enter')
        await this._page.keyboard.up('Shift')
      } else {
        await inputBox.press('Enter')
      }
    }
-    const responseP = new Promise<string>(async (resolve, reject) => {
+    const url = `https://chat.openai.com/backend-api/conversation`
-      try {
+    const body: types.ConversationJSONBody = {
-        do {
+      action,
-          await delay(1000)
+      messages: [
-
+        {
-          // TODO: this logic needs some work because we can have repeat messages...
+          id: messageId,
-          const newLastMessage = await this.getLastMessage()
+          role: 'user',
-          if (
+          content: {
-            newLastMessage &&
+            content_type: 'text',
-            lastMessage?.toLowerCase() !== newLastMessage?.toLowerCase()
+            parts: [message]
          ) {
            return resolve(newLastMessage)
          }
-        } while (true)
+        }
-      } catch (err) {
+      ],
-        return reject(err)
+      model: 'text-davinci-002-render',
-      }
+      parent_message_id: parentMessageId
    })
    if (timeoutMs) {
      return pTimeout(responseP, {
        milliseconds: timeoutMs
      })
    } else {
      return responseP
    }
    if (conversationId) {
      body.conversation_id = conversationId
    }
    // console.log('>>> EVALUATE', url, this._accessToken, body)
    const result = await this._page.evaluate(
      browserPostEventStream,
      url,
      this._accessToken,
      body
    )
    // console.log('<<< EVALUATE', result)
    if (result.error) {
      const error = new types.ChatGPTError(result.error.message)
      error.statusCode = result.error.statusCode
      error.statusText = result.error.statusText
      if (error.statusCode === 403) {
        await this.handle403Error()
      }
      throw error
    }
    // TODO: support sending partial response events
    if (onConversationResponse) {
      onConversationResponse(result.conversationResponse)
    }
    return result.response
    // const lastMessage = await this.getLastMessage()
    // await inputBox.focus()
    // const paragraphs = message.split('\n')
    // for (let i = 0; i < paragraphs.length; i++) {
    //   await inputBox.type(paragraphs[i], { delay: 0 })
    //   if (i < paragraphs.length - 1) {
    //     await this._page.keyboard.down('Shift')
    //     await inputBox.press('Enter')
    //     await this._page.keyboard.up('Shift')
    //   } else {
    //     await inputBox.press('Enter')
    //   }
    // }
    // const responseP = new Promise<string>(async (resolve, reject) => {
    //   try {
    //     do {
    //       await delay(1000)
    //       // TODO: this logic needs some work because we can have repeat messages...
    //       const newLastMessage = await this.getLastMessage()
    //       if (
    //         newLastMessage &&
    //         lastMessage?.toLowerCase() !== newLastMessage?.toLowerCase()
    //       ) {
    //         return resolve(newLastMessage)
    //       }
    //     } while (true)
    //   } catch (err) {
    //     return reject(err)
    //   }
    // })
    // if (timeoutMs) {
    //   return pTimeout(responseP, {
    //     milliseconds: timeoutMs
    //   })
    // } else {
    //   return responseP
    // }
  }
  async resetThread() {
--- a/src/openai-auth.ts
+++ b/src/openai-auth.ts
@ -127,7 +127,7 @@ export async function getOpenAIAuth({
        }
        await page.click('button[type="submit"]')
-        await page.waitForSelector('#password')
+        await page.waitForSelector('#password', { timeout: timeoutMs })
        await page.type('#password', password, { delay: 10 })
        submitP = () => page.click('button[type="submit"]')
      }
--- a/src/types.ts
+++ b/src/types.ts
@ -297,3 +297,19 @@ export class ChatGPTError extends Error {
  response?: Response
  originalError?: Error
 }
 export type ChatError = {
  error: { message: string; statusCode?: number; statusText?: string }
  response: null
  conversationId?: string
  messageId?: string
  conversationResponse?: ConversationResponseEvent
 }
 export type ChatResponse = {
  error: null
  response: string
  conversationId: string
  messageId: string
  conversationResponse?: ConversationResponseEvent
 }
--- a/src/utils.ts
+++ b/src/utils.ts
@ -1,7 +1,13 @@
 import type {
  EventSourceParseCallback,
  EventSourceParser
 } from 'eventsource-parser'
 import type { Page } from 'puppeteer'
 import { remark } from 'remark'
 import stripMarkdown from 'strip-markdown'
 import * as types from './types'
 export function markdownToText(markdown?: string): string {
  return remark()
    .use(stripMarkdown)
@ -44,7 +50,10 @@ export function isRelevantRequest(url: string): boolean {
    return false
  }
-  if (pathname.startsWith('/_next')) {
+  if (
    !pathname.startsWith('/backend-api/') &&
    !pathname.startsWith('/api/auth/session')
  ) {
    return false
  }
@ -54,3 +63,307 @@ export function isRelevantRequest(url: string): boolean {
  return true
 }
 /**
 * This function is injected into the ChatGPT webapp page using puppeteer. It
 * has to be fully self-contained, so we copied a few third-party sources and
 * included them in here.
 */
 export async function browserPostEventStream(
  url: string,
  accessToken: string,
  body: types.ConversationJSONBody
 ): Promise<types.ChatError | types.ChatResponse> {
  const BOM = [239, 187, 191]
  // Workaround for https://github.com/esbuild-kit/tsx/issues/113
  globalThis.__name = () => undefined
  let conversationId: string = body?.conversation_id
  let messageId: string = body?.messages?.[0]?.id
  let response = ''
  try {
    console.log('browserPostEventStream', url, accessToken, body)
    const res = await fetch(url, {
      method: 'POST',
      body: JSON.stringify(body),
      headers: {
        accept: 'text/event-stream',
        'x-openai-assistant-app-id': '',
        authorization: `Bearer ${accessToken}`,
        'content-type': 'application/json'
      }
    })
    console.log('EVENT', res)
    if (!res.ok) {
      return {
        error: {
          message: `ChatGPTAPI error ${res.status || res.statusText}`,
          statusCode: res.status,
          statusText: res.statusText
        },
        response: null,
        conversationId,
        messageId
      }
    }
    return await new Promise<types.ChatResponse>(async (resolve, reject) => {
      function onMessage(data: string) {
        if (data === '[DONE]') {
          return resolve({
            error: null,
            response,
            conversationId,
            messageId
          })
        }
        try {
          const parsedData: types.ConversationResponseEvent = JSON.parse(data)
          if (parsedData.conversation_id) {
            conversationId = parsedData.conversation_id
          }
          if (parsedData.message?.id) {
            messageId = parsedData.message.id
          }
          const partialResponse = parsedData.message?.content?.parts?.[0]
          if (partialResponse) {
            response = partialResponse
          }
        } catch (err) {
          console.warn('fetchSSE onMessage unexpected error', err)
          reject(err)
        }
      }
      const parser = createParser((event) => {
        if (event.type === 'event') {
          onMessage(event.data)
        }
      })
      for await (const chunk of streamAsyncIterable(res.body)) {
        const str = new TextDecoder().decode(chunk)
        parser.feed(str)
      }
    })
  } catch (err) {
    const errMessageL = err.toString().toLowerCase()
    if (
      response &&
      (errMessageL === 'error: typeerror: terminated' ||
        errMessageL === 'typeerror: terminated')
    ) {
      // OpenAI sometimes forcefully terminates the socket from their end before
      // the HTTP request has resolved cleanly. In my testing, these cases tend to
      // happen when OpenAI has already send the last `response`, so we can ignore
      // the `fetch` error in this case.
      return {
        error: null,
        response,
        conversationId,
        messageId
      }
    }
    return {
      error: {
        message: err.toString(),
        statusCode: err.statusCode || err.status || err.response?.statusCode,
        statusText: err.statusText || err.response?.statusText
      },
      response: null,
      conversationId,
      messageId
    }
  }
  async function* streamAsyncIterable<T>(stream: ReadableStream<T>) {
    const reader = stream.getReader()
    try {
      while (true) {
        const { done, value } = await reader.read()
        if (done) {
          return
        }
        yield value
      }
    } finally {
      reader.releaseLock()
    }
  }
  function createParser(onParse: EventSourceParseCallback): EventSourceParser {
    // Processing state
    let isFirstChunk: boolean
    let buffer: string
    let startingPosition: number
    let startingFieldLength: number
    // Event state
    let eventId: string | undefined
    let eventName: string | undefined
    let data: string
    reset()
    return { feed, reset }
    function reset(): void {
      isFirstChunk = true
      buffer = ''
      startingPosition = 0
      startingFieldLength = -1
      eventId = undefined
      eventName = undefined
      data = ''
    }
    function feed(chunk: string): void {
      buffer = buffer ? buffer + chunk : chunk
      // Strip any UTF8 byte order mark (BOM) at the start of the stream.
      // Note that we do not strip any non - UTF8 BOM, as eventsource streams are
      // always decoded as UTF8 as per the specification.
      if (isFirstChunk && hasBom(buffer)) {
        buffer = buffer.slice(BOM.length)
      }
      isFirstChunk = false
      // Set up chunk-specific processing state
      const length = buffer.length
      let position = 0
      let discardTrailingNewline = false
      // Read the current buffer byte by byte
      while (position < length) {
        // EventSource allows for carriage return + line feed, which means we
        // need to ignore a linefeed character if the previous character was a
        // carriage return
        // @todo refactor to reduce nesting, consider checking previous byte?
        // @todo but consider multiple chunks etc
        if (discardTrailingNewline) {
          if (buffer[position] === '\n') {
            ++position
          }
          discardTrailingNewline = false
        }
        let lineLength = -1
        let fieldLength = startingFieldLength
        let character: string
        for (
          let index = startingPosition;
          lineLength < 0 && index < length;
          ++index
        ) {
          character = buffer[index]
          if (character === ':' && fieldLength < 0) {
            fieldLength = index - position
          } else if (character === '\r') {
            discardTrailingNewline = true
            lineLength = index - position
          } else if (character === '\n') {
            lineLength = index - position
          }
        }
        if (lineLength < 0) {
          startingPosition = length - position
          startingFieldLength = fieldLength
          break
        } else {
          startingPosition = 0
          startingFieldLength = -1
        }
        parseEventStreamLine(buffer, position, fieldLength, lineLength)
        position += lineLength + 1
      }
      if (position === length) {
        // If we consumed the entire buffer to read the event, reset the buffer
        buffer = ''
      } else if (position > 0) {
        // If there are bytes left to process, set the buffer to the unprocessed
        // portion of the buffer only
        buffer = buffer.slice(position)
      }
    }
    function parseEventStreamLine(
      lineBuffer: string,
      index: number,
      fieldLength: number,
      lineLength: number
    ) {
      if (lineLength === 0) {
        // We reached the last line of this event
        if (data.length > 0) {
          onParse({
            type: 'event',
            id: eventId,
            event: eventName || undefined,
            data: data.slice(0, -1) // remove trailing newline
          })
          data = ''
          eventId = undefined
        }
        eventName = undefined
        return
      }
      const noValue = fieldLength < 0
      const field = lineBuffer.slice(
        index,
        index + (noValue ? lineLength : fieldLength)
      )
      let step = 0
      if (noValue) {
        step = lineLength
      } else if (lineBuffer[index + fieldLength + 1] === ' ') {
        step = fieldLength + 2
      } else {
        step = fieldLength + 1
      }
      const position = index + step
      const valueLength = lineLength - step
      const value = lineBuffer
        .slice(position, position + valueLength)
        .toString()
      if (field === 'data') {
        data += value ? `${value}\n` : '\n'
      } else if (field === 'event') {
        eventName = value
      } else if (field === 'id' && !value.includes('\u0000')) {
        eventId = value
      } else if (field === 'retry') {
        const retry = parseInt(value, 10)
        if (!Number.isNaN(retry)) {
          onParse({ type: 'reconnect-interval', value: retry })
        }
      }
    }
  }
  function hasBom(buffer: string) {
    return BOM.every(
      (charCode: number, index: number) => buffer.charCodeAt(index) === charCode
    )
  }
 }