Use piper for realistic TTS, remove google tts

main
pluja 2023-04-28 15:48:52 +02:00
rodzic 92fc09617a
commit f5947f03d9
9 zmienionych plików z 174 dodań i 54 usunięć

9
.gitignore vendored
Wyświetl plik

@ -1,4 +1,11 @@
venv/
.env
*.db
__pycache__/
__pycache__/
db_data
*.ogg
piper/*.so*
piper/piper
piper/espeak*
piper/voices
MODEL_CARD

Wyświetl plik

@ -1,8 +1,16 @@
FROM python:3.10-slim
RUN apt update && apt install -y ffmpeg libespeak1
# Set the voice language
ARG VOICE_LANGUAGE=en
RUN apt update && apt install -y ffmpeg wget libespeak1
WORKDIR /app
COPY ./entrypoint.sh /app
RUN chmod +x /app/entrypoint.sh
COPY ./piper /app/piper
COPY ./main.py /app
COPY ./database.py /app
COPY ./requirements.txt /app
@ -11,4 +19,4 @@ RUN mkdir db_data
RUN pip install --upgrade pip
RUN pip install -r requirements.txt
CMD [ "python3", "/app/main.py" ]
ENTRYPOINT [ "/app/entrypoint.sh" ]

Wyświetl plik

@ -10,7 +10,7 @@ A telegram bot to interact with OpenAI API. You can:
- Voice chat with ChatGPT:
- Send voice message.
- Receive voice messages.
- Use GoogleTTS or 100% local Espeak (more robotic).
- Use 100% local Text-To-Speech with Language Recognition to give ChatGPT a voice in many languages!
Other features include:
@ -53,7 +53,7 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps:
- Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
- Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
- Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
- WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
- WHISPER_TO_GPT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
- You can also configure this using `/settings` in chat.
- ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices. If disabled, it fallsback to local voice generation using Espeak.
- VOICE_LANGUAGE country code for the default voice accent.

7
entrypoint.sh 100644
Wyświetl plik

@ -0,0 +1,7 @@
#!/bin/bash
echo "Installing piper for text to voice conversion..."
bash /app/piper/init-piper.sh
echo "Bot starting..."
python3 -u /app/main.py

Wyświetl plik

@ -6,10 +6,13 @@ CHATGPT_MAX_USER_CONTEXT=5
CHATGPT_TEMPERATURE=1.0
# Use Whisper transcript from voice message with ChatGPT
WHISPER_TO_CHAT=1
# Use Google TTS for speech to text
ENABLE_GOOGLE_TTS=0
VOICE_LANGUAGE=en # en, es, fr, de, it, pt, ru, ja, ko
WHISPER_TO_GPT=1
# TTS Options
ENABLE_TTS=1
# If USE_TTS=1, you can set the following options
VOICE_LANGUAGE_LIST=en,es,fr,it,pt,ca
DEFAULT_VOICE_LANGUAGE=en
BOT_TOKEN=your-telegram-bot-token
BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs

81
main.py
Wyświetl plik

@ -1,19 +1,19 @@
import asyncio
import logging
import os
import tempfile
from functools import wraps
from io import BytesIO
import subprocess
import openai
import pyttsx3
from aiogram import Bot, Dispatcher, types
from aiogram.contrib.middlewares.logging import LoggingMiddleware
from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode
from aiogram.utils import executor
from dotenv import load_dotenv
from gtts import gTTS
from pydub import AudioSegment
from langdetect import detect
import database
@ -39,9 +39,13 @@ ALLOWED_USERS = os.environ.get("BOT_ALLOWED_USERS").split(",")
SYSTEM_PROMPT = os.environ.get("CHATGPT_SYSTEM_PROMPT")
TEMPERATURE = os.environ.get("CHATGPT_TEMPERATURE")
MODEL = os.environ.get("OPENAI_MODEL")
WHISPER_TO_CHAT = bool(int(os.environ.get("WHISPER_TO_CHAT")))
ENABLE_GOOGLE_TTS = bool(int(os.environ.get("ENABLE_GOOGLE_TTS")))
VOICE_LANGUAGE = os.environ.get("VOICE_LANGUAGE")
WHISPER_TO_GPT = bool(int(os.environ.get("WHISPER_TO_GPT")))
# TTS Settings
ENABLE_TTS = bool(int(os.environ.get("ENABLE_TTS")))
DEFAULT_VOICE_LANGUAGE = os.environ.get("DEFAULT_VOICE_LANGUAGE")
VOICE_LANGUAGE_LIST = os.environ.get("VOICE_LANGUAGE_LIST")
MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
openai.api_key = os.environ.get("OPENAI_API_KEY")
@ -52,7 +56,7 @@ async def getUserData(chat_id):
"context": [],
"usage": {"chatgpt": 0, "whisper": 0, "dalle": 0},
"options": {
"whisper_to_chat": WHISPER_TO_CHAT,
"whisper_to_chat": WHISPER_TO_GPT,
"assistant_voice_chat": False,
"temperature": float(TEMPERATURE),
"max-context": MAX_USER_CONTEXT
@ -83,43 +87,34 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup:
]
return InlineKeyboardMarkup(inline_keyboard=keyboard)
def change_voice(engine, gender='male'):
for voice in engine.getProperty('voices'):
if VOICE_LANGUAGE in voice.languages[0].decode('utf-8') and gender == voice.gender:
engine.setProperty('voice', voice.id)
return True
async def text_to_voice(text: str) -> BytesIO:
with tempfile.NamedTemporaryFile(mode='wb', suffix='.ogg', delete=False) as ogg_file:
temp_filename = ogg_file.name
voice_done = False
async def text_to_voice(text: str, language: str = None) -> BytesIO:
binary_path = "/home/whoami/PROJECTS/openai-telegram-bot/piper/piper"
if language is None:
language = detect(text[0:80])
# If Google TTS is enabled, try to use it first
if ENABLE_GOOGLE_TTS:
try:
tts = gTTS(text, lang=VOICE_LANGUAGE)
tts.save(temp_filename)
voice_done = True
except Exception as e:
print("Google TTS failed, falling back to pyttsx3: --> ", e)
# If Google TTS is disabled or failed, use pyttsx3
if not voice_done:
engine = pyttsx3.init()
change_voice(engine)
engine.setProperty('rate', 160)
engine.save_to_file(text, temp_filename)
engine.runAndWait()
engine.stop()
# Add a small delay before reading the file
await asyncio.sleep(1)
model_path = f"/home/whoami/PROJECTS/openai-telegram-bot/piper/voices/{language}.onnx"
# Generate a unique temporary filename with '.ogg' extension
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp:
tmp_filename = tmp.name
text = text.replace('"', "")
# Make the text be in a single line
text = text.replace("\n", " ")
# Construct the command to execute the binary
cmd = f"echo '{text}' | {binary_path} --model {model_path} --output_file {tmp_filename}"
# Run the binary and wait for it to finish
subprocess.run(cmd, shell=True, check=True)
# Open the file in binary mode and read its content into BytesIO object
with open(tmp_filename, 'rb') as file:
bytes_io = BytesIO(file.read())
with open(temp_filename, "rb") as audio_file:
voice_data = BytesIO(audio_file.read())
os.remove(temp_filename)
voice_data.seek(0)
return voice_data
# Delete the temporary file
os.remove(tmp_filename)
# Return the BytesIO object
return bytes_io
def restricted(func):
@ -364,7 +359,7 @@ if __name__ == '__main__':
print(f"Allowed users: {ALLOWED_USERS}")
print(f"System prompt: {SYSTEM_PROMPT}")
print(f"Google TTS: {ENABLE_GOOGLE_TTS}")
print(f"TTS: {ENABLE_TTS}")
# Register message handler and callback query handler for settings
dp.register_message_handler(settings, commands=['settings'])

Wyświetl plik

@ -0,0 +1,81 @@
#!/bin/bash
source .env
if [ "$ENABLE_TTS" = 1 ]; then
echo "Installing piper for text to voice conversion..."
echo "Downloading piper v0.0.2.."
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_amd64.tar.gz
echo "Extracting piper"
tar -xf piper_amd64.tar.gz
echo "Installing piper"
rm -rf piper_amd64
rm piper_amd64.tar.gz
chmod -R 777 ./piper/
mkdir piper/voices
echo "Downloading tts voices from VOICE_LANGUAGE_LIST..."
echo "This can take a while..."
# Check if "en" is in $VOICE_LANGUAGE_LIST and download the english voice from the repo
for lang in $(echo $VOICE_LANGUAGE_LIST | tr "," " "); do
if [ "$lang" = "en" ] ; then
echo "Downloading english voice..."
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz
tar -xf voice-en-us-ryan-high.tar.gz
mv en-us-ryan-high.onnx en.onnx
mv en-us-ryan-high.onnx.json en.onnx.json
rm -rf voice-en-us-ryan-high.tar.gz
echo "Done"
fi
if [ "$lang" = "es" ] ; then
echo "Downloading spanish voice..."
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz
tar -xf voice-es-mls_10246-low.tar.gz
mv es-mls_10246-low.onnx es.onnx
mv es-mls_10246-low.onnx.json es.onnx.json
rm -rf voice-es-mls_10246-low.tar.gz
echo "Done"
fi
if [ "$lang" = "fr" ] ; then
echo "Downloading french voice..."
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz
tar -xf voice-fr-siwis-medium.tar.gz
mv fr-siwis-medium.onnx fr.onnx
mv fr-siwis-medium.onnx.json fr.onnx.json
rm -rf voice-fr-siwis-medium.tar.gz
echo "Done"
fi
if [ "$lang" = "it" ]; then
echo "Downloading italian voice..."
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz
tar -xf voice-it-riccardo_fasol-x-low.tar.gz
mv it-riccardo_fasol-x-low.onnx it.onnx
mv it-riccardo_fasol-x-low.onnx.json it.onnx.json
rm -rf voice-it-riccardo_fasol-x-low.tar.gz
echo "Done"
fi
if [ "$lang" = "pt" ]; then
echo "Downloading portuguese voice..."
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz
tar -xf voice-pt-br-edresson-low.tar.gz
mv pt-br-edresson-low.onnx pt.onnx
mv pt-br-edresson-low.onnx.json pt.onnx.json
rm -rf voice-pt-br-edresson-low.tar.gz
echo "Done"
fi
if [ "$lang" = "ca" ] ; then
echo "Downloading catalan voice..."
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz
tar -xf voice-ca-upc_ona-x-low.tar.gz
mv ca-upc_ona-x-low.onnx ca.onnx
mv ca-upc_ona-x-low.onnx.json ca.onnx.json
rm -rf voice-ca-upc_ona-x-low.tar.gz
echo "Done"
fi
done
echo "Moving voices to piper/voices/"
mv *.onnx* piper/voices/
echo "Done. Piper installed!"
else
echo "TTS Disabled. No work to do..."
fi

Wyświetl plik

@ -1,6 +1,5 @@
aiogram==2.25.1
gTTS==2.3.1
langdetect==1.0.9
openai==0.27.2
pydub==0.25.1
python-dotenv==1.0.0
pyttsx3==2.90

20
utils.py 100644
Wyświetl plik

@ -0,0 +1,20 @@
import subprocess
import tempfile
import os
def text_to_speech(text: str) -> str:
binary_path = "./piper"
model_path = "blizzard_lessac-medium.onnx"
# Generate a unique temporary filename
with tempfile.NamedTemporaryFile(delete=False) as tmp:
tmp_filename = tmp.name
# Construct the command to execute the binary
cmd = f"echo '{text}' | {binary_path} --model {model_path} --output_file {tmp_filename}"
# Run the binary and wait for it to finish
subprocess.run(cmd, shell=True, check=True)
# Return the temporary filename
return tmp_filename