kopia lustrzana https://codeberg.org/pluja/openai-telegram-bot
Bug fixes and optimizations
rodzic
62d1aef168
commit
edf2131a2e
21
example.env
21
example.env
|
@ -1,3 +1,6 @@
|
||||||
|
# # # # # # # # # #
|
||||||
|
# OpenAI / ChatGPT #
|
||||||
|
# # # # # # # # # #
|
||||||
OPENAI_API_KEY=your-openai-api-key
|
OPENAI_API_KEY=your-openai-api-key
|
||||||
OPENAI_MODEL=gpt-3.5-turbo
|
OPENAI_MODEL=gpt-3.5-turbo
|
||||||
|
|
||||||
|
@ -8,11 +11,21 @@ CHATGPT_TEMPERATURE=1.0
|
||||||
# Use Whisper transcript from voice message with ChatGPT
|
# Use Whisper transcript from voice message with ChatGPT
|
||||||
WHISPER_TO_GPT=1
|
WHISPER_TO_GPT=1
|
||||||
|
|
||||||
# TTS Options
|
# # # # # # # #
|
||||||
|
# TTS Options #
|
||||||
|
# # # # # # # #
|
||||||
ENABLE_TTS=1
|
ENABLE_TTS=1
|
||||||
# If USE_TTS=1, you can set the following options
|
|
||||||
VOICE_LANGUAGE_LIST=en,es,fr,it,pt,ca
|
|
||||||
DEFAULT_VOICE_LANGUAGE=en
|
|
||||||
|
|
||||||
|
# If ENABLE_TTS=1, you can set the following options
|
||||||
|
# Remove any language you don't want to not downlad its voice
|
||||||
|
VOICE_LANGUAGE_LIST=en,es,fr,it,pt,ca,no,nl,de
|
||||||
|
DEFAULT_VOICE_LANGUAGE=en
|
||||||
|
|
||||||
|
# Do not change this line
|
||||||
|
LANGUAGES_H_SR=en,fr,nl,no
|
||||||
|
|
||||||
|
# # # # # # # # # #
|
||||||
|
# Telegram Options #
|
||||||
|
# # # # # # # # # #
|
||||||
BOT_TOKEN=your-telegram-bot-token
|
BOT_TOKEN=your-telegram-bot-token
|
||||||
BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs
|
BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs
|
||||||
|
|
91
main.py
91
main.py
|
@ -1,10 +1,10 @@
|
||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import wave
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from typing import Tuple
|
||||||
import subprocess
|
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
from aiogram import Bot, Dispatcher, types
|
from aiogram import Bot, Dispatcher, types
|
||||||
|
@ -12,8 +12,8 @@ from aiogram.contrib.middlewares.logging import LoggingMiddleware
|
||||||
from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode
|
from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode
|
||||||
from aiogram.utils import executor
|
from aiogram.utils import executor
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from pydub import AudioSegment
|
|
||||||
from langdetect import detect
|
from langdetect import detect
|
||||||
|
from pydub import AudioSegment
|
||||||
|
|
||||||
import database
|
import database
|
||||||
|
|
||||||
|
@ -46,6 +46,7 @@ WHISPER_TO_GPT = bool(int(os.environ.get("WHISPER_TO_GPT")))
|
||||||
ENABLE_TTS = bool(int(os.environ.get("ENABLE_TTS")))
|
ENABLE_TTS = bool(int(os.environ.get("ENABLE_TTS")))
|
||||||
DEFAULT_VOICE_LANGUAGE = os.environ.get("DEFAULT_VOICE_LANGUAGE")
|
DEFAULT_VOICE_LANGUAGE = os.environ.get("DEFAULT_VOICE_LANGUAGE")
|
||||||
VOICE_LANGUAGE_LIST = os.environ.get("VOICE_LANGUAGE_LIST")
|
VOICE_LANGUAGE_LIST = os.environ.get("VOICE_LANGUAGE_LIST")
|
||||||
|
LANGUAGES_22050 = os.environ.get("LANGUAGES_H_SR").split(",")
|
||||||
|
|
||||||
MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
|
MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
|
||||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
@ -107,7 +108,15 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup:
|
||||||
return InlineKeyboardMarkup(inline_keyboard=keyboard)
|
return InlineKeyboardMarkup(inline_keyboard=keyboard)
|
||||||
|
|
||||||
|
|
||||||
async def text_to_voice(text: str, language: str = None) -> BytesIO:
|
async def send_voice_message(chat_id, assistant_message):
|
||||||
|
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
||||||
|
audio_data, _ = await text_to_voice(assistant_message)
|
||||||
|
audio_data.seek(0) # Reset the buffer's position to the beginning
|
||||||
|
|
||||||
|
await bot.send_voice(chat_id, audio_data)
|
||||||
|
|
||||||
|
|
||||||
|
async def text_to_voice(text: str, language: str = None) -> Tuple[BytesIO, str]:
|
||||||
binary_path = "./piper/piper"
|
binary_path = "./piper/piper"
|
||||||
|
|
||||||
if language is None:
|
if language is None:
|
||||||
|
@ -115,34 +124,30 @@ async def text_to_voice(text: str, language: str = None) -> BytesIO:
|
||||||
|
|
||||||
model_path = f"./piper/voices/{language}.onnx"
|
model_path = f"./piper/voices/{language}.onnx"
|
||||||
|
|
||||||
# Generate a unique temporary filename with '.ogg' extension
|
text = text.replace("\n", ". ")
|
||||||
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp:
|
|
||||||
tmp_filename = tmp.name
|
|
||||||
|
|
||||||
# Run the binary with the escaped text as input and the temp file as output
|
cmd = [binary_path, "--model", model_path, "--output_raw"]
|
||||||
with open(tmp_filename, "wb") as tmp_file:
|
proc = await asyncio.create_subprocess_exec(
|
||||||
process = subprocess.Popen(
|
*cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE
|
||||||
[binary_path, "--model", model_path, "--output_file", "-"],
|
)
|
||||||
stdin=subprocess.PIPE,
|
|
||||||
stdout=tmp_file,
|
|
||||||
stderr=subprocess.PIPE,
|
|
||||||
text=True,
|
|
||||||
encoding="utf8",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Remove all newlines from the text so that the text is read as a single sentence
|
|
||||||
text = text.replace("\n", ". ")
|
|
||||||
process.communicate(input=text)
|
|
||||||
|
|
||||||
# Open the file in binary mode and read its content into BytesIO object
|
stdout, _ = await proc.communicate(input=text.encode("utf-8"))
|
||||||
with open(tmp_filename, "rb") as file:
|
|
||||||
bytes_io = BytesIO(file.read())
|
|
||||||
|
|
||||||
# Delete the temporary file
|
# Create a new BytesIO object to store the WAV file.
|
||||||
os.remove(tmp_filename)
|
wav_file = BytesIO()
|
||||||
|
with wave.open(wav_file, "wb") as wf:
|
||||||
|
# Assuming 1 channel, 16 bits per sample, and 22050 samples per second.
|
||||||
|
wf.setnchannels(1)
|
||||||
|
wf.setsampwidth(2)
|
||||||
|
wf.setframerate(22050 if language in LANGUAGES_22050 else 16000)
|
||||||
|
wf.writeframes(stdout)
|
||||||
|
|
||||||
# Return the BytesIO object
|
wav_file.seek(0)
|
||||||
return bytes_io
|
|
||||||
|
audio = AudioSegment.from_file(wav_file, format="wav")
|
||||||
|
audio = audio.export(format="ogg", codec="libopus", parameters=["-vbr", "on"])
|
||||||
|
|
||||||
|
return BytesIO(audio.read()), "ogg"
|
||||||
|
|
||||||
|
|
||||||
def restricted(func):
|
def restricted(func):
|
||||||
|
@ -254,7 +259,7 @@ async def usage(message: types.Message) -> None:
|
||||||
- Generated {user_usage["dalle"]} images with DALL-E.
|
- Generated {user_usage["dalle"]} images with DALL-E.
|
||||||
- Transcribed {round(float(user_usage["whisper"]) / 60.0, 2)}min with Whisper.
|
- Transcribed {round(float(user_usage["whisper"]) / 60.0, 2)}min with Whisper.
|
||||||
|
|
||||||
Total spent: ${user_spent} ({user_percentage:.2f}% of total)
|
User total: ${user_spent} ({user_percentage:.2f}% of total)
|
||||||
|
|
||||||
Total usage:
|
Total usage:
|
||||||
- ChatGPT tokens: {total_usage["chatgpt"]}
|
- ChatGPT tokens: {total_usage["chatgpt"]}
|
||||||
|
@ -297,9 +302,9 @@ async def attachment(message: types.Message):
|
||||||
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
||||||
|
|
||||||
transcript = {"text": ""}
|
transcript = {"text": ""}
|
||||||
|
|
||||||
audioMessage = False
|
audioMessage = False
|
||||||
|
|
||||||
|
# Handle media types
|
||||||
if message.voice:
|
if message.voice:
|
||||||
user_data["usage"]["whisper"] += message.voice.duration
|
user_data["usage"]["whisper"] += message.voice.duration
|
||||||
file_id = message.voice.file_id
|
file_id = message.voice.file_id
|
||||||
|
@ -317,16 +322,19 @@ async def attachment(message: types.Message):
|
||||||
await message.reply("Can't handle such file. Reason: unknown.")
|
await message.reply("Can't handle such file. Reason: unknown.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Download file from Telegram
|
||||||
file = await bot.get_file(file_id)
|
file = await bot.get_file(file_id)
|
||||||
user_id = message.chat.id
|
user_id = message.chat.id
|
||||||
await file.download(f"{user_id}.{file_format}")
|
await file.download(f"{user_id}.{file_format}")
|
||||||
|
|
||||||
|
# Convert audio to mp3 if needed, because OpenAI doesn't support ogg
|
||||||
if file_format == "ogg":
|
if file_format == "ogg":
|
||||||
ogg_audio = AudioSegment.from_file(f"{user_id}.ogg", format="ogg")
|
ogg_audio = AudioSegment.from_file(f"{user_id}.ogg", format="ogg")
|
||||||
ogg_audio.export(f"{user_id}.mp3", format="mp3")
|
ogg_audio.export(f"{user_id}.mp3", format="mp3")
|
||||||
os.remove(f"{user_id}.ogg")
|
os.remove(f"{user_id}.ogg")
|
||||||
file_format = "mp3"
|
file_format = "mp3"
|
||||||
|
|
||||||
|
# Transcribe audio with OpenAI API
|
||||||
with open(f"{user_id}.{file_format}", "rb") as audio_file:
|
with open(f"{user_id}.{file_format}", "rb") as audio_file:
|
||||||
try:
|
try:
|
||||||
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
||||||
|
@ -336,25 +344,28 @@ async def attachment(message: types.Message):
|
||||||
await message.reply("Transcript failed.")
|
await message.reply("Transcript failed.")
|
||||||
os.remove(f"{user_id}.{file_format}")
|
os.remove(f"{user_id}.{file_format}")
|
||||||
return
|
return
|
||||||
|
os.remove(f"{user_id}.{file_format}")
|
||||||
|
|
||||||
os.remove(f"{user_id}.{file_format}")
|
# Handle empty transcript
|
||||||
|
|
||||||
if transcript["text"] == "":
|
if transcript["text"] == "":
|
||||||
transcript["text"] = "[Silence]"
|
transcript["text"] = "[Silence]"
|
||||||
|
|
||||||
|
# If whisper_to_chat is enabled, send transcript to ChatGPT and send its response along with the transcript
|
||||||
chatGPT_response = False
|
chatGPT_response = False
|
||||||
if audioMessage and user_data["options"]["whisper_to_chat"]:
|
if audioMessage and user_data["options"]["whisper_to_chat"]:
|
||||||
chatGPT_response, user_data = await messageGPT(
|
chatGPT_response, user_data = await messageGPT(
|
||||||
transcript["text"], str(chat_id), message.from_user.full_name, user_data
|
transcript["text"], str(chat_id), message.from_user.full_name, user_data
|
||||||
)
|
)
|
||||||
transcript["text"] = "> " + transcript["text"] + "\n\n" + chatGPT_response
|
transcript["text"] = ''.join(["> ", transcript["text"], "\n\n", chatGPT_response])
|
||||||
|
|
||||||
|
# Send transcript (and ChatGPT response if enabled)
|
||||||
await message.reply(transcript["text"])
|
await message.reply(transcript["text"])
|
||||||
|
|
||||||
|
# Send ChatGPT response as voice message with piper TTS if enabled
|
||||||
if user_data["options"]["assistant_voice_chat"] and chatGPT_response:
|
if user_data["options"]["assistant_voice_chat"] and chatGPT_response:
|
||||||
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
asyncio.create_task(send_voice_message(chat_id, chatGPT_response))
|
||||||
voice_data = await text_to_voice(chatGPT_response)
|
|
||||||
await message.reply_voice(voice_data)
|
# Update user data
|
||||||
|
|
||||||
database.update_user(str(chat_id), user_data)
|
database.update_user(str(chat_id), user_data)
|
||||||
|
|
||||||
|
|
||||||
|
@ -420,9 +431,7 @@ async def chat(message: types.Message):
|
||||||
await message.reply(assistant_message, parse_mode=ParseMode.MARKDOWN)
|
await message.reply(assistant_message, parse_mode=ParseMode.MARKDOWN)
|
||||||
|
|
||||||
if user_data["options"]["assistant_voice_chat"]:
|
if user_data["options"]["assistant_voice_chat"]:
|
||||||
await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
|
asyncio.create_task(send_voice_message(chat_id, assistant_message))
|
||||||
voice_data = await text_to_voice(assistant_message)
|
|
||||||
await message.reply_voice(voice_data)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Ładowanie…
Reference in New Issue