kopia lustrzana https://codeberg.org/pluja/openai-telegram-bot
Use piper for realistic TTS, remove google tts
rodzic
92fc09617a
commit
f5947f03d9
|
@ -1,4 +1,11 @@
|
||||||
venv/
|
venv/
|
||||||
.env
|
.env
|
||||||
*.db
|
*.db
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
db_data
|
||||||
|
*.ogg
|
||||||
|
piper/*.so*
|
||||||
|
piper/piper
|
||||||
|
piper/espeak*
|
||||||
|
piper/voices
|
||||||
|
MODEL_CARD
|
12
Dockerfile
12
Dockerfile
|
@ -1,8 +1,16 @@
|
||||||
FROM python:3.10-slim
|
FROM python:3.10-slim
|
||||||
|
|
||||||
RUN apt update && apt install -y ffmpeg libespeak1
|
# Set the voice language
|
||||||
|
ARG VOICE_LANGUAGE=en
|
||||||
|
|
||||||
|
RUN apt update && apt install -y ffmpeg wget libespeak1
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY ./entrypoint.sh /app
|
||||||
|
RUN chmod +x /app/entrypoint.sh
|
||||||
|
|
||||||
|
COPY ./piper /app/piper
|
||||||
|
|
||||||
COPY ./main.py /app
|
COPY ./main.py /app
|
||||||
COPY ./database.py /app
|
COPY ./database.py /app
|
||||||
COPY ./requirements.txt /app
|
COPY ./requirements.txt /app
|
||||||
|
@ -11,4 +19,4 @@ RUN mkdir db_data
|
||||||
|
|
||||||
RUN pip install --upgrade pip
|
RUN pip install --upgrade pip
|
||||||
RUN pip install -r requirements.txt
|
RUN pip install -r requirements.txt
|
||||||
CMD [ "python3", "/app/main.py" ]
|
ENTRYPOINT [ "/app/entrypoint.sh" ]
|
|
@ -10,7 +10,7 @@ A telegram bot to interact with OpenAI API. You can:
|
||||||
- Voice chat with ChatGPT:
|
- Voice chat with ChatGPT:
|
||||||
- Send voice message.
|
- Send voice message.
|
||||||
- Receive voice messages.
|
- Receive voice messages.
|
||||||
- Use GoogleTTS or 100% local Espeak (more robotic).
|
- Use 100% local Text-To-Speech with Language Recognition to give ChatGPT a voice in many languages!
|
||||||
|
|
||||||
Other features include:
|
Other features include:
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps:
|
||||||
- Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
|
- Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
|
||||||
- Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
|
- Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
|
||||||
- Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
|
- Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
|
||||||
- WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
|
- WHISPER_TO_GPT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
|
||||||
- You can also configure this using `/settings` in chat.
|
- You can also configure this using `/settings` in chat.
|
||||||
- ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices. If disabled, it fallsback to local voice generation using Espeak.
|
- ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices. If disabled, it fallsback to local voice generation using Espeak.
|
||||||
- VOICE_LANGUAGE country code for the default voice accent.
|
- VOICE_LANGUAGE country code for the default voice accent.
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo "Installing piper for text to voice conversion..."
|
||||||
|
bash /app/piper/init-piper.sh
|
||||||
|
|
||||||
|
echo "Bot starting..."
|
||||||
|
python3 -u /app/main.py
|
11
example.env
11
example.env
|
@ -6,10 +6,13 @@ CHATGPT_MAX_USER_CONTEXT=5
|
||||||
CHATGPT_TEMPERATURE=1.0
|
CHATGPT_TEMPERATURE=1.0
|
||||||
|
|
||||||
# Use Whisper transcript from voice message with ChatGPT
|
# Use Whisper transcript from voice message with ChatGPT
|
||||||
WHISPER_TO_CHAT=1
|
WHISPER_TO_GPT=1
|
||||||
# Use Google TTS for speech to text
|
|
||||||
ENABLE_GOOGLE_TTS=0
|
# TTS Options
|
||||||
VOICE_LANGUAGE=en # en, es, fr, de, it, pt, ru, ja, ko
|
ENABLE_TTS=1
|
||||||
|
# If USE_TTS=1, you can set the following options
|
||||||
|
VOICE_LANGUAGE_LIST=en,es,fr,it,pt,ca
|
||||||
|
DEFAULT_VOICE_LANGUAGE=en
|
||||||
|
|
||||||
BOT_TOKEN=your-telegram-bot-token
|
BOT_TOKEN=your-telegram-bot-token
|
||||||
BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs
|
BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs
|
||||||
|
|
81
main.py
81
main.py
|
@ -1,19 +1,19 @@
|
||||||
import asyncio
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
import pyttsx3
|
|
||||||
from aiogram import Bot, Dispatcher, types
|
from aiogram import Bot, Dispatcher, types
|
||||||
from aiogram.contrib.middlewares.logging import LoggingMiddleware
|
from aiogram.contrib.middlewares.logging import LoggingMiddleware
|
||||||
from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode
|
from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode
|
||||||
from aiogram.utils import executor
|
from aiogram.utils import executor
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from gtts import gTTS
|
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
|
from langdetect import detect
|
||||||
|
|
||||||
import database
|
import database
|
||||||
|
|
||||||
|
@ -39,9 +39,13 @@ ALLOWED_USERS = os.environ.get("BOT_ALLOWED_USERS").split(",")
|
||||||
SYSTEM_PROMPT = os.environ.get("CHATGPT_SYSTEM_PROMPT")
|
SYSTEM_PROMPT = os.environ.get("CHATGPT_SYSTEM_PROMPT")
|
||||||
TEMPERATURE = os.environ.get("CHATGPT_TEMPERATURE")
|
TEMPERATURE = os.environ.get("CHATGPT_TEMPERATURE")
|
||||||
MODEL = os.environ.get("OPENAI_MODEL")
|
MODEL = os.environ.get("OPENAI_MODEL")
|
||||||
WHISPER_TO_CHAT = bool(int(os.environ.get("WHISPER_TO_CHAT")))
|
WHISPER_TO_GPT = bool(int(os.environ.get("WHISPER_TO_GPT")))
|
||||||
ENABLE_GOOGLE_TTS = bool(int(os.environ.get("ENABLE_GOOGLE_TTS")))
|
|
||||||
VOICE_LANGUAGE = os.environ.get("VOICE_LANGUAGE")
|
# TTS Settings
|
||||||
|
ENABLE_TTS = bool(int(os.environ.get("ENABLE_TTS")))
|
||||||
|
DEFAULT_VOICE_LANGUAGE = os.environ.get("DEFAULT_VOICE_LANGUAGE")
|
||||||
|
VOICE_LANGUAGE_LIST = os.environ.get("VOICE_LANGUAGE_LIST")
|
||||||
|
|
||||||
MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
|
MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
|
||||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
|
||||||
|
@ -52,7 +56,7 @@ async def getUserData(chat_id):
|
||||||
"context": [],
|
"context": [],
|
||||||
"usage": {"chatgpt": 0, "whisper": 0, "dalle": 0},
|
"usage": {"chatgpt": 0, "whisper": 0, "dalle": 0},
|
||||||
"options": {
|
"options": {
|
||||||
"whisper_to_chat": WHISPER_TO_CHAT,
|
"whisper_to_chat": WHISPER_TO_GPT,
|
||||||
"assistant_voice_chat": False,
|
"assistant_voice_chat": False,
|
||||||
"temperature": float(TEMPERATURE),
|
"temperature": float(TEMPERATURE),
|
||||||
"max-context": MAX_USER_CONTEXT
|
"max-context": MAX_USER_CONTEXT
|
||||||
|
@ -83,43 +87,34 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup:
|
||||||
]
|
]
|
||||||
return InlineKeyboardMarkup(inline_keyboard=keyboard)
|
return InlineKeyboardMarkup(inline_keyboard=keyboard)
|
||||||
|
|
||||||
def change_voice(engine, gender='male'):
|
async def text_to_voice(text: str, language: str = None) -> BytesIO:
|
||||||
for voice in engine.getProperty('voices'):
|
binary_path = "/home/whoami/PROJECTS/openai-telegram-bot/piper/piper"
|
||||||
if VOICE_LANGUAGE in voice.languages[0].decode('utf-8') and gender == voice.gender:
|
if language is None:
|
||||||
engine.setProperty('voice', voice.id)
|
language = detect(text[0:80])
|
||||||
return True
|
|
||||||
|
|
||||||
async def text_to_voice(text: str) -> BytesIO:
|
|
||||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.ogg', delete=False) as ogg_file:
|
|
||||||
temp_filename = ogg_file.name
|
|
||||||
voice_done = False
|
|
||||||
|
|
||||||
# If Google TTS is enabled, try to use it first
|
model_path = f"/home/whoami/PROJECTS/openai-telegram-bot/piper/voices/{language}.onnx"
|
||||||
if ENABLE_GOOGLE_TTS:
|
# Generate a unique temporary filename with '.ogg' extension
|
||||||
try:
|
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp:
|
||||||
tts = gTTS(text, lang=VOICE_LANGUAGE)
|
tmp_filename = tmp.name
|
||||||
tts.save(temp_filename)
|
|
||||||
voice_done = True
|
text = text.replace('"', "")
|
||||||
except Exception as e:
|
# Make the text be in a single line
|
||||||
print("Google TTS failed, falling back to pyttsx3: --> ", e)
|
text = text.replace("\n", " ")
|
||||||
|
# Construct the command to execute the binary
|
||||||
# If Google TTS is disabled or failed, use pyttsx3
|
cmd = f"echo '{text}' | {binary_path} --model {model_path} --output_file {tmp_filename}"
|
||||||
if not voice_done:
|
|
||||||
engine = pyttsx3.init()
|
# Run the binary and wait for it to finish
|
||||||
change_voice(engine)
|
subprocess.run(cmd, shell=True, check=True)
|
||||||
engine.setProperty('rate', 160)
|
|
||||||
engine.save_to_file(text, temp_filename)
|
# Open the file in binary mode and read its content into BytesIO object
|
||||||
engine.runAndWait()
|
with open(tmp_filename, 'rb') as file:
|
||||||
engine.stop()
|
bytes_io = BytesIO(file.read())
|
||||||
# Add a small delay before reading the file
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
|
|
||||||
with open(temp_filename, "rb") as audio_file:
|
# Delete the temporary file
|
||||||
voice_data = BytesIO(audio_file.read())
|
os.remove(tmp_filename)
|
||||||
|
|
||||||
os.remove(temp_filename)
|
# Return the BytesIO object
|
||||||
voice_data.seek(0)
|
return bytes_io
|
||||||
return voice_data
|
|
||||||
|
|
||||||
|
|
||||||
def restricted(func):
|
def restricted(func):
|
||||||
|
@ -364,7 +359,7 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
print(f"Allowed users: {ALLOWED_USERS}")
|
print(f"Allowed users: {ALLOWED_USERS}")
|
||||||
print(f"System prompt: {SYSTEM_PROMPT}")
|
print(f"System prompt: {SYSTEM_PROMPT}")
|
||||||
print(f"Google TTS: {ENABLE_GOOGLE_TTS}")
|
print(f"TTS: {ENABLE_TTS}")
|
||||||
|
|
||||||
# Register message handler and callback query handler for settings
|
# Register message handler and callback query handler for settings
|
||||||
dp.register_message_handler(settings, commands=['settings'])
|
dp.register_message_handler(settings, commands=['settings'])
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
source .env
|
||||||
|
if [ "$ENABLE_TTS" = 1 ]; then
|
||||||
|
echo "Installing piper for text to voice conversion..."
|
||||||
|
echo "Downloading piper v0.0.2.."
|
||||||
|
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_amd64.tar.gz
|
||||||
|
echo "Extracting piper"
|
||||||
|
tar -xf piper_amd64.tar.gz
|
||||||
|
|
||||||
|
echo "Installing piper"
|
||||||
|
rm -rf piper_amd64
|
||||||
|
rm piper_amd64.tar.gz
|
||||||
|
chmod -R 777 ./piper/
|
||||||
|
mkdir piper/voices
|
||||||
|
|
||||||
|
echo "Downloading tts voices from VOICE_LANGUAGE_LIST..."
|
||||||
|
echo "This can take a while..."
|
||||||
|
# Check if "en" is in $VOICE_LANGUAGE_LIST and download the english voice from the repo
|
||||||
|
for lang in $(echo $VOICE_LANGUAGE_LIST | tr "," " "); do
|
||||||
|
if [ "$lang" = "en" ] ; then
|
||||||
|
echo "Downloading english voice..."
|
||||||
|
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz
|
||||||
|
tar -xf voice-en-us-ryan-high.tar.gz
|
||||||
|
mv en-us-ryan-high.onnx en.onnx
|
||||||
|
mv en-us-ryan-high.onnx.json en.onnx.json
|
||||||
|
rm -rf voice-en-us-ryan-high.tar.gz
|
||||||
|
echo "Done"
|
||||||
|
fi
|
||||||
|
if [ "$lang" = "es" ] ; then
|
||||||
|
echo "Downloading spanish voice..."
|
||||||
|
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz
|
||||||
|
tar -xf voice-es-mls_10246-low.tar.gz
|
||||||
|
mv es-mls_10246-low.onnx es.onnx
|
||||||
|
mv es-mls_10246-low.onnx.json es.onnx.json
|
||||||
|
rm -rf voice-es-mls_10246-low.tar.gz
|
||||||
|
echo "Done"
|
||||||
|
fi
|
||||||
|
if [ "$lang" = "fr" ] ; then
|
||||||
|
echo "Downloading french voice..."
|
||||||
|
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz
|
||||||
|
tar -xf voice-fr-siwis-medium.tar.gz
|
||||||
|
mv fr-siwis-medium.onnx fr.onnx
|
||||||
|
mv fr-siwis-medium.onnx.json fr.onnx.json
|
||||||
|
rm -rf voice-fr-siwis-medium.tar.gz
|
||||||
|
echo "Done"
|
||||||
|
fi
|
||||||
|
if [ "$lang" = "it" ]; then
|
||||||
|
echo "Downloading italian voice..."
|
||||||
|
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz
|
||||||
|
tar -xf voice-it-riccardo_fasol-x-low.tar.gz
|
||||||
|
mv it-riccardo_fasol-x-low.onnx it.onnx
|
||||||
|
mv it-riccardo_fasol-x-low.onnx.json it.onnx.json
|
||||||
|
rm -rf voice-it-riccardo_fasol-x-low.tar.gz
|
||||||
|
echo "Done"
|
||||||
|
fi
|
||||||
|
if [ "$lang" = "pt" ]; then
|
||||||
|
echo "Downloading portuguese voice..."
|
||||||
|
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz
|
||||||
|
tar -xf voice-pt-br-edresson-low.tar.gz
|
||||||
|
mv pt-br-edresson-low.onnx pt.onnx
|
||||||
|
mv pt-br-edresson-low.onnx.json pt.onnx.json
|
||||||
|
rm -rf voice-pt-br-edresson-low.tar.gz
|
||||||
|
echo "Done"
|
||||||
|
fi
|
||||||
|
if [ "$lang" = "ca" ] ; then
|
||||||
|
echo "Downloading catalan voice..."
|
||||||
|
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz
|
||||||
|
tar -xf voice-ca-upc_ona-x-low.tar.gz
|
||||||
|
mv ca-upc_ona-x-low.onnx ca.onnx
|
||||||
|
mv ca-upc_ona-x-low.onnx.json ca.onnx.json
|
||||||
|
rm -rf voice-ca-upc_ona-x-low.tar.gz
|
||||||
|
echo "Done"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo "Moving voices to piper/voices/"
|
||||||
|
mv *.onnx* piper/voices/
|
||||||
|
echo "Done. Piper installed!"
|
||||||
|
else
|
||||||
|
echo "TTS Disabled. No work to do..."
|
||||||
|
fi
|
|
@ -1,6 +1,5 @@
|
||||||
aiogram==2.25.1
|
aiogram==2.25.1
|
||||||
gTTS==2.3.1
|
langdetect==1.0.9
|
||||||
openai==0.27.2
|
openai==0.27.2
|
||||||
pydub==0.25.1
|
pydub==0.25.1
|
||||||
python-dotenv==1.0.0
|
python-dotenv==1.0.0
|
||||||
pyttsx3==2.90
|
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
|
||||||
|
def text_to_speech(text: str) -> str:
|
||||||
|
binary_path = "./piper"
|
||||||
|
model_path = "blizzard_lessac-medium.onnx"
|
||||||
|
|
||||||
|
# Generate a unique temporary filename
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
||||||
|
tmp_filename = tmp.name
|
||||||
|
|
||||||
|
# Construct the command to execute the binary
|
||||||
|
cmd = f"echo '{text}' | {binary_path} --model {model_path} --output_file {tmp_filename}"
|
||||||
|
|
||||||
|
# Run the binary and wait for it to finish
|
||||||
|
subprocess.run(cmd, shell=True, check=True)
|
||||||
|
|
||||||
|
# Return the temporary filename
|
||||||
|
return tmp_filename
|
Ładowanie…
Reference in New Issue