Bug fixes and optimizations

main
pluja 2023-04-29 18:34:20 +02:00
rodzic 62d1aef168
commit edf2131a2e
2 zmienionych plików z 67 dodań i 45 usunięć

Wyświetl plik

@ -1,3 +1,6 @@
# # # # # # # # # #
# OpenAI / ChatGPT #
# # # # # # # # # #
OPENAI_API_KEY=your-openai-api-key OPENAI_API_KEY=your-openai-api-key
OPENAI_MODEL=gpt-3.5-turbo OPENAI_MODEL=gpt-3.5-turbo
@ -8,11 +11,21 @@ CHATGPT_TEMPERATURE=1.0
# Use Whisper transcript from voice message with ChatGPT # Use Whisper transcript from voice message with ChatGPT
WHISPER_TO_GPT=1 WHISPER_TO_GPT=1
# TTS Options # # # # # # # #
# TTS Options #
# # # # # # # #
ENABLE_TTS=1 ENABLE_TTS=1
# If USE_TTS=1, you can set the following options
VOICE_LANGUAGE_LIST=en,es,fr,it,pt,ca
DEFAULT_VOICE_LANGUAGE=en
# If ENABLE_TTS=1, you can set the following options
# Remove any language you don't want to not downlad its voice
VOICE_LANGUAGE_LIST=en,es,fr,it,pt,ca,no,nl,de
DEFAULT_VOICE_LANGUAGE=en
# Do not change this line
LANGUAGES_H_SR=en,fr,nl,no
# # # # # # # # # #
# Telegram Options #
# # # # # # # # # #
BOT_TOKEN=your-telegram-bot-token BOT_TOKEN=your-telegram-bot-token
BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs

91
main.py
Wyświetl plik

@ -1,10 +1,10 @@
import asyncio
import logging import logging
import os import os
import tempfile import wave
from functools import wraps from functools import wraps
from io import BytesIO from io import BytesIO
from typing import Tuple
import subprocess
import openai import openai
from aiogram import Bot, Dispatcher, types from aiogram import Bot, Dispatcher, types
@ -12,8 +12,8 @@ from aiogram.contrib.middlewares.logging import LoggingMiddleware
from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode
from aiogram.utils import executor from aiogram.utils import executor
from dotenv import load_dotenv from dotenv import load_dotenv
from pydub import AudioSegment
from langdetect import detect from langdetect import detect
from pydub import AudioSegment
import database import database
@ -46,6 +46,7 @@ WHISPER_TO_GPT = bool(int(os.environ.get("WHISPER_TO_GPT")))
ENABLE_TTS = bool(int(os.environ.get("ENABLE_TTS"))) ENABLE_TTS = bool(int(os.environ.get("ENABLE_TTS")))
DEFAULT_VOICE_LANGUAGE = os.environ.get("DEFAULT_VOICE_LANGUAGE") DEFAULT_VOICE_LANGUAGE = os.environ.get("DEFAULT_VOICE_LANGUAGE")
VOICE_LANGUAGE_LIST = os.environ.get("VOICE_LANGUAGE_LIST") VOICE_LANGUAGE_LIST = os.environ.get("VOICE_LANGUAGE_LIST")
LANGUAGES_22050 = os.environ.get("LANGUAGES_H_SR").split(",")
MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT")) MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
openai.api_key = os.environ.get("OPENAI_API_KEY") openai.api_key = os.environ.get("OPENAI_API_KEY")
@ -107,7 +108,15 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup:
return InlineKeyboardMarkup(inline_keyboard=keyboard) return InlineKeyboardMarkup(inline_keyboard=keyboard)
async def text_to_voice(text: str, language: str = None) -> BytesIO: async def send_voice_message(chat_id, assistant_message):
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
audio_data, _ = await text_to_voice(assistant_message)
audio_data.seek(0) # Reset the buffer's position to the beginning
await bot.send_voice(chat_id, audio_data)
async def text_to_voice(text: str, language: str = None) -> Tuple[BytesIO, str]:
binary_path = "./piper/piper" binary_path = "./piper/piper"
if language is None: if language is None:
@ -115,34 +124,30 @@ async def text_to_voice(text: str, language: str = None) -> BytesIO:
model_path = f"./piper/voices/{language}.onnx" model_path = f"./piper/voices/{language}.onnx"
# Generate a unique temporary filename with '.ogg' extension text = text.replace("\n", ". ")
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp:
tmp_filename = tmp.name
# Run the binary with the escaped text as input and the temp file as output cmd = [binary_path, "--model", model_path, "--output_raw"]
with open(tmp_filename, "wb") as tmp_file: proc = await asyncio.create_subprocess_exec(
process = subprocess.Popen( *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE
[binary_path, "--model", model_path, "--output_file", "-"], )
stdin=subprocess.PIPE,
stdout=tmp_file,
stderr=subprocess.PIPE,
text=True,
encoding="utf8",
)
# Remove all newlines from the text so that the text is read as a single sentence
text = text.replace("\n", ". ")
process.communicate(input=text)
# Open the file in binary mode and read its content into BytesIO object stdout, _ = await proc.communicate(input=text.encode("utf-8"))
with open(tmp_filename, "rb") as file:
bytes_io = BytesIO(file.read())
# Delete the temporary file # Create a new BytesIO object to store the WAV file.
os.remove(tmp_filename) wav_file = BytesIO()
with wave.open(wav_file, "wb") as wf:
# Assuming 1 channel, 16 bits per sample, and 22050 samples per second.
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(22050 if language in LANGUAGES_22050 else 16000)
wf.writeframes(stdout)
# Return the BytesIO object wav_file.seek(0)
return bytes_io
audio = AudioSegment.from_file(wav_file, format="wav")
audio = audio.export(format="ogg", codec="libopus", parameters=["-vbr", "on"])
return BytesIO(audio.read()), "ogg"
def restricted(func): def restricted(func):
@ -254,7 +259,7 @@ async def usage(message: types.Message) -> None:
- Generated {user_usage["dalle"]} images with DALL-E. - Generated {user_usage["dalle"]} images with DALL-E.
- Transcribed {round(float(user_usage["whisper"]) / 60.0, 2)}min with Whisper. - Transcribed {round(float(user_usage["whisper"]) / 60.0, 2)}min with Whisper.
Total spent: ${user_spent} ({user_percentage:.2f}% of total) User total: ${user_spent} ({user_percentage:.2f}% of total)
Total usage: Total usage:
- ChatGPT tokens: {total_usage["chatgpt"]} - ChatGPT tokens: {total_usage["chatgpt"]}
@ -297,9 +302,9 @@ async def attachment(message: types.Message):
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING) await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
transcript = {"text": ""} transcript = {"text": ""}
audioMessage = False audioMessage = False
# Handle media types
if message.voice: if message.voice:
user_data["usage"]["whisper"] += message.voice.duration user_data["usage"]["whisper"] += message.voice.duration
file_id = message.voice.file_id file_id = message.voice.file_id
@ -317,16 +322,19 @@ async def attachment(message: types.Message):
await message.reply("Can't handle such file. Reason: unknown.") await message.reply("Can't handle such file. Reason: unknown.")
return return
# Download file from Telegram
file = await bot.get_file(file_id) file = await bot.get_file(file_id)
user_id = message.chat.id user_id = message.chat.id
await file.download(f"{user_id}.{file_format}") await file.download(f"{user_id}.{file_format}")
# Convert audio to mp3 if needed, because OpenAI doesn't support ogg
if file_format == "ogg": if file_format == "ogg":
ogg_audio = AudioSegment.from_file(f"{user_id}.ogg", format="ogg") ogg_audio = AudioSegment.from_file(f"{user_id}.ogg", format="ogg")
ogg_audio.export(f"{user_id}.mp3", format="mp3") ogg_audio.export(f"{user_id}.mp3", format="mp3")
os.remove(f"{user_id}.ogg") os.remove(f"{user_id}.ogg")
file_format = "mp3" file_format = "mp3"
# Transcribe audio with OpenAI API
with open(f"{user_id}.{file_format}", "rb") as audio_file: with open(f"{user_id}.{file_format}", "rb") as audio_file:
try: try:
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING) await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
@ -336,25 +344,28 @@ async def attachment(message: types.Message):
await message.reply("Transcript failed.") await message.reply("Transcript failed.")
os.remove(f"{user_id}.{file_format}") os.remove(f"{user_id}.{file_format}")
return return
os.remove(f"{user_id}.{file_format}")
os.remove(f"{user_id}.{file_format}") # Handle empty transcript
if transcript["text"] == "": if transcript["text"] == "":
transcript["text"] = "[Silence]" transcript["text"] = "[Silence]"
# If whisper_to_chat is enabled, send transcript to ChatGPT and send its response along with the transcript
chatGPT_response = False chatGPT_response = False
if audioMessage and user_data["options"]["whisper_to_chat"]: if audioMessage and user_data["options"]["whisper_to_chat"]:
chatGPT_response, user_data = await messageGPT( chatGPT_response, user_data = await messageGPT(
transcript["text"], str(chat_id), message.from_user.full_name, user_data transcript["text"], str(chat_id), message.from_user.full_name, user_data
) )
transcript["text"] = "> " + transcript["text"] + "\n\n" + chatGPT_response transcript["text"] = ''.join(["> ", transcript["text"], "\n\n", chatGPT_response])
# Send transcript (and ChatGPT response if enabled)
await message.reply(transcript["text"]) await message.reply(transcript["text"])
# Send ChatGPT response as voice message with piper TTS if enabled
if user_data["options"]["assistant_voice_chat"] and chatGPT_response: if user_data["options"]["assistant_voice_chat"] and chatGPT_response:
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING) asyncio.create_task(send_voice_message(chat_id, chatGPT_response))
voice_data = await text_to_voice(chatGPT_response)
await message.reply_voice(voice_data) # Update user data
database.update_user(str(chat_id), user_data) database.update_user(str(chat_id), user_data)
@ -420,9 +431,7 @@ async def chat(message: types.Message):
await message.reply(assistant_message, parse_mode=ParseMode.MARKDOWN) await message.reply(assistant_message, parse_mode=ParseMode.MARKDOWN)
if user_data["options"]["assistant_voice_chat"]: if user_data["options"]["assistant_voice_chat"]:
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING) asyncio.create_task(send_voice_message(chat_id, assistant_message))
voice_data = await text_to_voice(assistant_message)
await message.reply_voice(voice_data)
if __name__ == "__main__": if __name__ == "__main__":