kopia lustrzana https://codeberg.org/pluja/openai-telegram-bot
Bug fixes and optimizations
rodzic
62d1aef168
commit
edf2131a2e
21
example.env
21
example.env
|
@ -1,3 +1,6 @@
|
|||
# # # # # # # # # #
|
||||
# OpenAI / ChatGPT #
|
||||
# # # # # # # # # #
|
||||
OPENAI_API_KEY=your-openai-api-key
|
||||
OPENAI_MODEL=gpt-3.5-turbo
|
||||
|
||||
|
@ -8,11 +11,21 @@ CHATGPT_TEMPERATURE=1.0
|
|||
# Use Whisper transcript from voice message with ChatGPT
|
||||
WHISPER_TO_GPT=1
|
||||
|
||||
# TTS Options
|
||||
# # # # # # # #
|
||||
# TTS Options #
|
||||
# # # # # # # #
|
||||
ENABLE_TTS=1
|
||||
# If USE_TTS=1, you can set the following options
|
||||
VOICE_LANGUAGE_LIST=en,es,fr,it,pt,ca
|
||||
DEFAULT_VOICE_LANGUAGE=en
|
||||
|
||||
# If ENABLE_TTS=1, you can set the following options
|
||||
# Remove any language you don't want to not downlad its voice
|
||||
VOICE_LANGUAGE_LIST=en,es,fr,it,pt,ca,no,nl,de
|
||||
DEFAULT_VOICE_LANGUAGE=en
|
||||
|
||||
# Do not change this line
|
||||
LANGUAGES_H_SR=en,fr,nl,no
|
||||
|
||||
# # # # # # # # # #
|
||||
# Telegram Options #
|
||||
# # # # # # # # # #
|
||||
BOT_TOKEN=your-telegram-bot-token
|
||||
BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs
|
||||
|
|
91
main.py
91
main.py
|
@ -1,10 +1,10 @@
|
|||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import wave
|
||||
from functools import wraps
|
||||
from io import BytesIO
|
||||
|
||||
import subprocess
|
||||
from typing import Tuple
|
||||
|
||||
import openai
|
||||
from aiogram import Bot, Dispatcher, types
|
||||
|
@ -12,8 +12,8 @@ from aiogram.contrib.middlewares.logging import LoggingMiddleware
|
|||
from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode
|
||||
from aiogram.utils import executor
|
||||
from dotenv import load_dotenv
|
||||
from pydub import AudioSegment
|
||||
from langdetect import detect
|
||||
from pydub import AudioSegment
|
||||
|
||||
import database
|
||||
|
||||
|
@ -46,6 +46,7 @@ WHISPER_TO_GPT = bool(int(os.environ.get("WHISPER_TO_GPT")))
|
|||
ENABLE_TTS = bool(int(os.environ.get("ENABLE_TTS")))
|
||||
DEFAULT_VOICE_LANGUAGE = os.environ.get("DEFAULT_VOICE_LANGUAGE")
|
||||
VOICE_LANGUAGE_LIST = os.environ.get("VOICE_LANGUAGE_LIST")
|
||||
LANGUAGES_22050 = os.environ.get("LANGUAGES_H_SR").split(",")
|
||||
|
||||
MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
|
||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
@ -107,7 +108,15 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup:
|
|||
return InlineKeyboardMarkup(inline_keyboard=keyboard)
|
||||
|
||||
|
||||
async def text_to_voice(text: str, language: str = None) -> BytesIO:
|
||||
async def send_voice_message(chat_id, assistant_message):
|
||||
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
||||
audio_data, _ = await text_to_voice(assistant_message)
|
||||
audio_data.seek(0) # Reset the buffer's position to the beginning
|
||||
|
||||
await bot.send_voice(chat_id, audio_data)
|
||||
|
||||
|
||||
async def text_to_voice(text: str, language: str = None) -> Tuple[BytesIO, str]:
|
||||
binary_path = "./piper/piper"
|
||||
|
||||
if language is None:
|
||||
|
@ -115,34 +124,30 @@ async def text_to_voice(text: str, language: str = None) -> BytesIO:
|
|||
|
||||
model_path = f"./piper/voices/{language}.onnx"
|
||||
|
||||
# Generate a unique temporary filename with '.ogg' extension
|
||||
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp:
|
||||
tmp_filename = tmp.name
|
||||
text = text.replace("\n", ". ")
|
||||
|
||||
# Run the binary with the escaped text as input and the temp file as output
|
||||
with open(tmp_filename, "wb") as tmp_file:
|
||||
process = subprocess.Popen(
|
||||
[binary_path, "--model", model_path, "--output_file", "-"],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=tmp_file,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
encoding="utf8",
|
||||
)
|
||||
|
||||
# Remove all newlines from the text so that the text is read as a single sentence
|
||||
text = text.replace("\n", ". ")
|
||||
process.communicate(input=text)
|
||||
cmd = [binary_path, "--model", model_path, "--output_raw"]
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
# Open the file in binary mode and read its content into BytesIO object
|
||||
with open(tmp_filename, "rb") as file:
|
||||
bytes_io = BytesIO(file.read())
|
||||
stdout, _ = await proc.communicate(input=text.encode("utf-8"))
|
||||
|
||||
# Delete the temporary file
|
||||
os.remove(tmp_filename)
|
||||
# Create a new BytesIO object to store the WAV file.
|
||||
wav_file = BytesIO()
|
||||
with wave.open(wav_file, "wb") as wf:
|
||||
# Assuming 1 channel, 16 bits per sample, and 22050 samples per second.
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(22050 if language in LANGUAGES_22050 else 16000)
|
||||
wf.writeframes(stdout)
|
||||
|
||||
# Return the BytesIO object
|
||||
return bytes_io
|
||||
wav_file.seek(0)
|
||||
|
||||
audio = AudioSegment.from_file(wav_file, format="wav")
|
||||
audio = audio.export(format="ogg", codec="libopus", parameters=["-vbr", "on"])
|
||||
|
||||
return BytesIO(audio.read()), "ogg"
|
||||
|
||||
|
||||
def restricted(func):
|
||||
|
@ -254,7 +259,7 @@ async def usage(message: types.Message) -> None:
|
|||
- Generated {user_usage["dalle"]} images with DALL-E.
|
||||
- Transcribed {round(float(user_usage["whisper"]) / 60.0, 2)}min with Whisper.
|
||||
|
||||
Total spent: ${user_spent} ({user_percentage:.2f}% of total)
|
||||
User total: ${user_spent} ({user_percentage:.2f}% of total)
|
||||
|
||||
Total usage:
|
||||
- ChatGPT tokens: {total_usage["chatgpt"]}
|
||||
|
@ -297,9 +302,9 @@ async def attachment(message: types.Message):
|
|||
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
||||
|
||||
transcript = {"text": ""}
|
||||
|
||||
audioMessage = False
|
||||
|
||||
# Handle media types
|
||||
if message.voice:
|
||||
user_data["usage"]["whisper"] += message.voice.duration
|
||||
file_id = message.voice.file_id
|
||||
|
@ -317,16 +322,19 @@ async def attachment(message: types.Message):
|
|||
await message.reply("Can't handle such file. Reason: unknown.")
|
||||
return
|
||||
|
||||
# Download file from Telegram
|
||||
file = await bot.get_file(file_id)
|
||||
user_id = message.chat.id
|
||||
await file.download(f"{user_id}.{file_format}")
|
||||
|
||||
# Convert audio to mp3 if needed, because OpenAI doesn't support ogg
|
||||
if file_format == "ogg":
|
||||
ogg_audio = AudioSegment.from_file(f"{user_id}.ogg", format="ogg")
|
||||
ogg_audio.export(f"{user_id}.mp3", format="mp3")
|
||||
os.remove(f"{user_id}.ogg")
|
||||
file_format = "mp3"
|
||||
|
||||
# Transcribe audio with OpenAI API
|
||||
with open(f"{user_id}.{file_format}", "rb") as audio_file:
|
||||
try:
|
||||
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
||||
|
@ -336,25 +344,28 @@ async def attachment(message: types.Message):
|
|||
await message.reply("Transcript failed.")
|
||||
os.remove(f"{user_id}.{file_format}")
|
||||
return
|
||||
os.remove(f"{user_id}.{file_format}")
|
||||
|
||||
os.remove(f"{user_id}.{file_format}")
|
||||
|
||||
# Handle empty transcript
|
||||
if transcript["text"] == "":
|
||||
transcript["text"] = "[Silence]"
|
||||
|
||||
# If whisper_to_chat is enabled, send transcript to ChatGPT and send its response along with the transcript
|
||||
chatGPT_response = False
|
||||
if audioMessage and user_data["options"]["whisper_to_chat"]:
|
||||
chatGPT_response, user_data = await messageGPT(
|
||||
transcript["text"], str(chat_id), message.from_user.full_name, user_data
|
||||
)
|
||||
transcript["text"] = "> " + transcript["text"] + "\n\n" + chatGPT_response
|
||||
transcript["text"] = ''.join(["> ", transcript["text"], "\n\n", chatGPT_response])
|
||||
|
||||
# Send transcript (and ChatGPT response if enabled)
|
||||
await message.reply(transcript["text"])
|
||||
|
||||
# Send ChatGPT response as voice message with piper TTS if enabled
|
||||
if user_data["options"]["assistant_voice_chat"] and chatGPT_response:
|
||||
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
||||
voice_data = await text_to_voice(chatGPT_response)
|
||||
await message.reply_voice(voice_data)
|
||||
|
||||
asyncio.create_task(send_voice_message(chat_id, chatGPT_response))
|
||||
|
||||
# Update user data
|
||||
database.update_user(str(chat_id), user_data)
|
||||
|
||||
|
||||
|
@ -420,9 +431,7 @@ async def chat(message: types.Message):
|
|||
await message.reply(assistant_message, parse_mode=ParseMode.MARKDOWN)
|
||||
|
||||
if user_data["options"]["assistant_voice_chat"]:
|
||||
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
||||
voice_data = await text_to_voice(assistant_message)
|
||||
await message.reply_voice(voice_data)
|
||||
asyncio.create_task(send_voice_message(chat_id, assistant_message))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Ładowanie…
Reference in New Issue