feat: 支持 gpt4 v 图片

pull/629/head
Jake 2023-11-19 10:40:25 +08:00
rodzic 2cb9fda958
commit 2d1d8713ee
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: BB676DBDCF031D7C
3 zmienionych plików z 147 dodań i 10 usunięć

Wyświetl plik

@ -139,6 +139,7 @@ export class ChatGPTAPI {
opts: types.SendMessageOptions = {}
): Promise<types.ChatMessage> {
const {
image,
parentMessageId,
messageId = uuidv4(),
timeoutMs,
@ -161,7 +162,8 @@ export class ChatGPTAPI {
id: messageId,
conversationId,
parentMessageId,
text
text,
image
}
const latestQuestion = message
@ -358,12 +360,30 @@ export class ChatGPTAPI {
this._apiOrg = apiOrg
}
protected getContentString(
content: types.ChatCompletionRequestMessageContent
) {
if (Array.isArray(content)) {
return content
.map((item) => {
if (item.type === 'text') {
return item.text
} else if (item.type === 'image_url') {
return item.image_url
}
})
.join('\n')
}
return content
}
async buildMessages(text: string, opts: types.SendMessageOptions) {
return this._buildMessages(text, opts)
}
protected async _buildMessages(text: string, opts: types.SendMessageOptions) {
const { systemMessage = this._systemMessage } = opts
const { systemMessage = this._systemMessage, image } = opts
let { parentMessageId } = opts
const userLabel = USER_LABEL_DEFAULT
@ -379,12 +399,32 @@ export class ChatGPTAPI {
})
}
let userContent: types.ChatCompletionRequestMessageContent = text
let imageTokens = 0
if (image?.url) {
userContent = [
{
type: 'text',
text: text
},
{
type: 'image_url',
image_url: {
url: image.url,
detail: 'high'
}
}
]
imageTokens = tokenizer.getTokensImage(image.width, image.height, 'high')
}
const systemMessageOffset = messages.length
let nextMessages = text
? messages.concat([
{
role: 'user',
content: text,
content: userContent,
name: opts.name
}
])
@ -399,15 +439,19 @@ export class ChatGPTAPI {
return prompt
// return prompt.concat([`Instructions:\n${message.content}`])
case 'user':
return prompt.concat([`${userLabel}:\n${message.content}`])
return prompt.concat([
`${userLabel}:\n${this.getContentString(message.content)}`
])
default:
return prompt.concat([`${assistantLabel}:\n${message.content}`])
return prompt.concat([
`${assistantLabel}:\n${this.getContentString(message.content)}`
])
}
}, [] as string[])
.join('\n\n')
const nextNumTokensEstimate = await this._getTokenCount(prompt)
const isValidPrompt = nextNumTokensEstimate <= maxNumTokens
const isValidPrompt = nextNumTokensEstimate + imageTokens <= maxNumTokens
if (prompt && !isValidPrompt) {
break
@ -431,10 +475,35 @@ export class ChatGPTAPI {
const parentMessageRole = parentMessage.role || 'user'
let content: types.ChatCompletionRequestMessageContent =
parentMessage.text
if (parentMessage.image?.url) {
content = [
{
type: 'text',
text: parentMessage.text
},
{
type: 'image_url',
image_url: {
url: parentMessage.image.url,
detail: 'high'
}
}
]
imageTokens += tokenizer.getTokensImage(
parentMessage.image.width,
parentMessage.image.height,
'high'
)
}
nextMessages = nextMessages.slice(0, systemMessageOffset).concat([
{
role: parentMessageRole,
content: parentMessage.text,
content,
name: parentMessage.name
},
...nextMessages.slice(systemMessageOffset)
@ -447,10 +516,13 @@ export class ChatGPTAPI {
// for the response.
const maxTokens = Math.max(
1,
Math.min(this._maxModelTokens - numTokens, this._maxResponseTokens)
Math.min(
this._maxModelTokens - numTokens - imageTokens,
this._maxResponseTokens
)
)
return { messages, maxTokens, numTokens }
return { messages, maxTokens, numTokens, imageTokens }
}
protected async _getTokenCount(text: string) {

Wyświetl plik

@ -1,8 +1,47 @@
import { get_encoding } from '@dqbd/tiktoken'
import { ChatCompletionRequestMessageContentImageDetail } from './types'
// TODO: make this configurable
const tokenizer = get_encoding('cl100k_base')
export function encode(input: string): Uint32Array {
return tokenizer.encode(input)
}
export function getTokensImage(
width: number,
height: number,
detail: ChatCompletionRequestMessageContentImageDetail
) {
if (detail === 'low') {
return 85
}
// https://openai.com/pricing
// https://platform.openai.com/docs/guides/vision
const maxLength = Math.max(width, height)
let percentage = 0
if (maxLength >= 2048) {
percentage = 2048 / maxLength
width = Math.ceil(width * percentage)
height = Math.ceil(height * percentage)
}
const minLength = Math.min(width, height)
if (minLength >= 1024) {
percentage = 768 / minLength
width = Math.ceil(width * percentage)
height = Math.ceil(height * percentage)
}
// 下面计算方式有问题不清楚Resize 是怎么计算的
const h = Math.ceil(height / 512)
const w = Math.ceil(width / 512)
const n = w * h
const total = 85 + 170 * n
return total
}

Wyświetl plik

@ -34,9 +34,16 @@ export type ChatGPTAPIOptions = {
fetch?: FetchFn
}
export type SendMessageImage = {
url: string
width: number
height: number
}
export type SendMessageOptions = {
/** The name of a user in a multi-user chat. */
name?: string
image?: SendMessageImage
parentMessageId?: string
conversationId?: string
messageId?: string
@ -50,6 +57,24 @@ export type SendMessageOptions = {
>
}
export type ChatCompletionRequestMessageContentImageDetail = 'low' | 'high'
export type ChatCompletionRequestMessageContent =
| string
| [
{
type: 'text'
text: string
},
{
type: 'image_url'
image_url: {
url: string
detail: ChatCompletionRequestMessageContentImageDetail
}
}
]
export type MessageActionType = 'next' | 'variant'
export type SendMessageBrowserOptions = {
@ -65,6 +90,7 @@ export type SendMessageBrowserOptions = {
export interface ChatMessage {
id: string
text: string
image?: SendMessageImage
role: Role
name?: string
delta?: string
@ -224,7 +250,7 @@ export namespace openai {
* @type {string}
* @memberof ChatCompletionRequestMessage
*/
content: string
content: ChatCompletionRequestMessageContent
/**
* The name of the user in a multi-user chat
* @type {string}