From 05f6c8f70fcc098026f72831a39cc6de007f6259 Mon Sep 17 00:00:00 2001 From: Travis Fischer Date: Wed, 7 Jun 2023 12:29:06 -0700 Subject: [PATCH] =?UTF-8?q?=E2=9B=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- legacy/src/llms/llm.ts | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/legacy/src/llms/llm.ts b/legacy/src/llms/llm.ts index 40046122..30215a12 100644 --- a/legacy/src/llms/llm.ts +++ b/legacy/src/llms/llm.ts @@ -30,7 +30,7 @@ export abstract class BaseLLM< protected _model: string protected _modelParams: TModelParams | undefined protected _examples: types.LLMExample[] | undefined - protected _tokenizer?: Tokenizer | null + protected _tokenizerP?: Promise constructor( options: SetRequired< @@ -95,23 +95,22 @@ export abstract class BaseLLM< } async getNumTokens(text: string): Promise { - if (this._tokenizer === undefined) { + if (!this._tokenizerP) { const model = this._model || 'gpt2' - try { - this._tokenizer = await getTokenizerForModel(model) - } catch (err) { - this._tokenizer = null - + this._tokenizerP = getTokenizerForModel(model).catch((err) => { console.warn( `Failed to initialize tokenizer for model "${model}", falling back to approximate count`, err ) - } + + return null + }) } - if (this._tokenizer) { - return this._tokenizer.encode(text).length + const tokenizer = await this._tokenizerP + if (tokenizer) { + return tokenizer.encode(text).length } // fallback to approximate calculation if tokenizer is not available