kopia lustrzana https://codeberg.org/pluja/openai-telegram-bot
Use piper for realistic TTS, remove google tts
rodzic
92fc09617a
commit
f5947f03d9
|
@ -1,4 +1,11 @@
|
|||
venv/
|
||||
.env
|
||||
*.db
|
||||
__pycache__/
|
||||
__pycache__/
|
||||
db_data
|
||||
*.ogg
|
||||
piper/*.so*
|
||||
piper/piper
|
||||
piper/espeak*
|
||||
piper/voices
|
||||
MODEL_CARD
|
12
Dockerfile
12
Dockerfile
|
@ -1,8 +1,16 @@
|
|||
FROM python:3.10-slim
|
||||
|
||||
RUN apt update && apt install -y ffmpeg libespeak1
|
||||
# Set the voice language
|
||||
ARG VOICE_LANGUAGE=en
|
||||
|
||||
RUN apt update && apt install -y ffmpeg wget libespeak1
|
||||
WORKDIR /app
|
||||
|
||||
COPY ./entrypoint.sh /app
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
|
||||
COPY ./piper /app/piper
|
||||
|
||||
COPY ./main.py /app
|
||||
COPY ./database.py /app
|
||||
COPY ./requirements.txt /app
|
||||
|
@ -11,4 +19,4 @@ RUN mkdir db_data
|
|||
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install -r requirements.txt
|
||||
CMD [ "python3", "/app/main.py" ]
|
||||
ENTRYPOINT [ "/app/entrypoint.sh" ]
|
|
@ -10,7 +10,7 @@ A telegram bot to interact with OpenAI API. You can:
|
|||
- Voice chat with ChatGPT:
|
||||
- Send voice message.
|
||||
- Receive voice messages.
|
||||
- Use GoogleTTS or 100% local Espeak (more robotic).
|
||||
- Use 100% local Text-To-Speech with Language Recognition to give ChatGPT a voice in many languages!
|
||||
|
||||
Other features include:
|
||||
|
||||
|
@ -53,7 +53,7 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps:
|
|||
- Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
|
||||
- Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
|
||||
- Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
|
||||
- WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
|
||||
- WHISPER_TO_GPT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
|
||||
- You can also configure this using `/settings` in chat.
|
||||
- ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices. If disabled, it fallsback to local voice generation using Espeak.
|
||||
- VOICE_LANGUAGE country code for the default voice accent.
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
echo "Installing piper for text to voice conversion..."
|
||||
bash /app/piper/init-piper.sh
|
||||
|
||||
echo "Bot starting..."
|
||||
python3 -u /app/main.py
|
11
example.env
11
example.env
|
@ -6,10 +6,13 @@ CHATGPT_MAX_USER_CONTEXT=5
|
|||
CHATGPT_TEMPERATURE=1.0
|
||||
|
||||
# Use Whisper transcript from voice message with ChatGPT
|
||||
WHISPER_TO_CHAT=1
|
||||
# Use Google TTS for speech to text
|
||||
ENABLE_GOOGLE_TTS=0
|
||||
VOICE_LANGUAGE=en # en, es, fr, de, it, pt, ru, ja, ko
|
||||
WHISPER_TO_GPT=1
|
||||
|
||||
# TTS Options
|
||||
ENABLE_TTS=1
|
||||
# If USE_TTS=1, you can set the following options
|
||||
VOICE_LANGUAGE_LIST=en,es,fr,it,pt,ca
|
||||
DEFAULT_VOICE_LANGUAGE=en
|
||||
|
||||
BOT_TOKEN=your-telegram-bot-token
|
||||
BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs
|
||||
|
|
81
main.py
81
main.py
|
@ -1,19 +1,19 @@
|
|||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from functools import wraps
|
||||
from io import BytesIO
|
||||
|
||||
import subprocess
|
||||
|
||||
import openai
|
||||
import pyttsx3
|
||||
from aiogram import Bot, Dispatcher, types
|
||||
from aiogram.contrib.middlewares.logging import LoggingMiddleware
|
||||
from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode
|
||||
from aiogram.utils import executor
|
||||
from dotenv import load_dotenv
|
||||
from gtts import gTTS
|
||||
from pydub import AudioSegment
|
||||
from langdetect import detect
|
||||
|
||||
import database
|
||||
|
||||
|
@ -39,9 +39,13 @@ ALLOWED_USERS = os.environ.get("BOT_ALLOWED_USERS").split(",")
|
|||
SYSTEM_PROMPT = os.environ.get("CHATGPT_SYSTEM_PROMPT")
|
||||
TEMPERATURE = os.environ.get("CHATGPT_TEMPERATURE")
|
||||
MODEL = os.environ.get("OPENAI_MODEL")
|
||||
WHISPER_TO_CHAT = bool(int(os.environ.get("WHISPER_TO_CHAT")))
|
||||
ENABLE_GOOGLE_TTS = bool(int(os.environ.get("ENABLE_GOOGLE_TTS")))
|
||||
VOICE_LANGUAGE = os.environ.get("VOICE_LANGUAGE")
|
||||
WHISPER_TO_GPT = bool(int(os.environ.get("WHISPER_TO_GPT")))
|
||||
|
||||
# TTS Settings
|
||||
ENABLE_TTS = bool(int(os.environ.get("ENABLE_TTS")))
|
||||
DEFAULT_VOICE_LANGUAGE = os.environ.get("DEFAULT_VOICE_LANGUAGE")
|
||||
VOICE_LANGUAGE_LIST = os.environ.get("VOICE_LANGUAGE_LIST")
|
||||
|
||||
MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
|
||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
|
@ -52,7 +56,7 @@ async def getUserData(chat_id):
|
|||
"context": [],
|
||||
"usage": {"chatgpt": 0, "whisper": 0, "dalle": 0},
|
||||
"options": {
|
||||
"whisper_to_chat": WHISPER_TO_CHAT,
|
||||
"whisper_to_chat": WHISPER_TO_GPT,
|
||||
"assistant_voice_chat": False,
|
||||
"temperature": float(TEMPERATURE),
|
||||
"max-context": MAX_USER_CONTEXT
|
||||
|
@ -83,43 +87,34 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup:
|
|||
]
|
||||
return InlineKeyboardMarkup(inline_keyboard=keyboard)
|
||||
|
||||
def change_voice(engine, gender='male'):
|
||||
for voice in engine.getProperty('voices'):
|
||||
if VOICE_LANGUAGE in voice.languages[0].decode('utf-8') and gender == voice.gender:
|
||||
engine.setProperty('voice', voice.id)
|
||||
return True
|
||||
|
||||
async def text_to_voice(text: str) -> BytesIO:
|
||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.ogg', delete=False) as ogg_file:
|
||||
temp_filename = ogg_file.name
|
||||
voice_done = False
|
||||
async def text_to_voice(text: str, language: str = None) -> BytesIO:
|
||||
binary_path = "/home/whoami/PROJECTS/openai-telegram-bot/piper/piper"
|
||||
if language is None:
|
||||
language = detect(text[0:80])
|
||||
|
||||
# If Google TTS is enabled, try to use it first
|
||||
if ENABLE_GOOGLE_TTS:
|
||||
try:
|
||||
tts = gTTS(text, lang=VOICE_LANGUAGE)
|
||||
tts.save(temp_filename)
|
||||
voice_done = True
|
||||
except Exception as e:
|
||||
print("Google TTS failed, falling back to pyttsx3: --> ", e)
|
||||
|
||||
# If Google TTS is disabled or failed, use pyttsx3
|
||||
if not voice_done:
|
||||
engine = pyttsx3.init()
|
||||
change_voice(engine)
|
||||
engine.setProperty('rate', 160)
|
||||
engine.save_to_file(text, temp_filename)
|
||||
engine.runAndWait()
|
||||
engine.stop()
|
||||
# Add a small delay before reading the file
|
||||
await asyncio.sleep(1)
|
||||
model_path = f"/home/whoami/PROJECTS/openai-telegram-bot/piper/voices/{language}.onnx"
|
||||
# Generate a unique temporary filename with '.ogg' extension
|
||||
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp:
|
||||
tmp_filename = tmp.name
|
||||
|
||||
text = text.replace('"', "")
|
||||
# Make the text be in a single line
|
||||
text = text.replace("\n", " ")
|
||||
# Construct the command to execute the binary
|
||||
cmd = f"echo '{text}' | {binary_path} --model {model_path} --output_file {tmp_filename}"
|
||||
|
||||
# Run the binary and wait for it to finish
|
||||
subprocess.run(cmd, shell=True, check=True)
|
||||
|
||||
# Open the file in binary mode and read its content into BytesIO object
|
||||
with open(tmp_filename, 'rb') as file:
|
||||
bytes_io = BytesIO(file.read())
|
||||
|
||||
with open(temp_filename, "rb") as audio_file:
|
||||
voice_data = BytesIO(audio_file.read())
|
||||
|
||||
os.remove(temp_filename)
|
||||
voice_data.seek(0)
|
||||
return voice_data
|
||||
# Delete the temporary file
|
||||
os.remove(tmp_filename)
|
||||
|
||||
# Return the BytesIO object
|
||||
return bytes_io
|
||||
|
||||
|
||||
def restricted(func):
|
||||
|
@ -364,7 +359,7 @@ if __name__ == '__main__':
|
|||
|
||||
print(f"Allowed users: {ALLOWED_USERS}")
|
||||
print(f"System prompt: {SYSTEM_PROMPT}")
|
||||
print(f"Google TTS: {ENABLE_GOOGLE_TTS}")
|
||||
print(f"TTS: {ENABLE_TTS}")
|
||||
|
||||
# Register message handler and callback query handler for settings
|
||||
dp.register_message_handler(settings, commands=['settings'])
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
#!/bin/bash
|
||||
|
||||
source .env
|
||||
if [ "$ENABLE_TTS" = 1 ]; then
|
||||
echo "Installing piper for text to voice conversion..."
|
||||
echo "Downloading piper v0.0.2.."
|
||||
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_amd64.tar.gz
|
||||
echo "Extracting piper"
|
||||
tar -xf piper_amd64.tar.gz
|
||||
|
||||
echo "Installing piper"
|
||||
rm -rf piper_amd64
|
||||
rm piper_amd64.tar.gz
|
||||
chmod -R 777 ./piper/
|
||||
mkdir piper/voices
|
||||
|
||||
echo "Downloading tts voices from VOICE_LANGUAGE_LIST..."
|
||||
echo "This can take a while..."
|
||||
# Check if "en" is in $VOICE_LANGUAGE_LIST and download the english voice from the repo
|
||||
for lang in $(echo $VOICE_LANGUAGE_LIST | tr "," " "); do
|
||||
if [ "$lang" = "en" ] ; then
|
||||
echo "Downloading english voice..."
|
||||
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz
|
||||
tar -xf voice-en-us-ryan-high.tar.gz
|
||||
mv en-us-ryan-high.onnx en.onnx
|
||||
mv en-us-ryan-high.onnx.json en.onnx.json
|
||||
rm -rf voice-en-us-ryan-high.tar.gz
|
||||
echo "Done"
|
||||
fi
|
||||
if [ "$lang" = "es" ] ; then
|
||||
echo "Downloading spanish voice..."
|
||||
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz
|
||||
tar -xf voice-es-mls_10246-low.tar.gz
|
||||
mv es-mls_10246-low.onnx es.onnx
|
||||
mv es-mls_10246-low.onnx.json es.onnx.json
|
||||
rm -rf voice-es-mls_10246-low.tar.gz
|
||||
echo "Done"
|
||||
fi
|
||||
if [ "$lang" = "fr" ] ; then
|
||||
echo "Downloading french voice..."
|
||||
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz
|
||||
tar -xf voice-fr-siwis-medium.tar.gz
|
||||
mv fr-siwis-medium.onnx fr.onnx
|
||||
mv fr-siwis-medium.onnx.json fr.onnx.json
|
||||
rm -rf voice-fr-siwis-medium.tar.gz
|
||||
echo "Done"
|
||||
fi
|
||||
if [ "$lang" = "it" ]; then
|
||||
echo "Downloading italian voice..."
|
||||
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz
|
||||
tar -xf voice-it-riccardo_fasol-x-low.tar.gz
|
||||
mv it-riccardo_fasol-x-low.onnx it.onnx
|
||||
mv it-riccardo_fasol-x-low.onnx.json it.onnx.json
|
||||
rm -rf voice-it-riccardo_fasol-x-low.tar.gz
|
||||
echo "Done"
|
||||
fi
|
||||
if [ "$lang" = "pt" ]; then
|
||||
echo "Downloading portuguese voice..."
|
||||
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz
|
||||
tar -xf voice-pt-br-edresson-low.tar.gz
|
||||
mv pt-br-edresson-low.onnx pt.onnx
|
||||
mv pt-br-edresson-low.onnx.json pt.onnx.json
|
||||
rm -rf voice-pt-br-edresson-low.tar.gz
|
||||
echo "Done"
|
||||
fi
|
||||
if [ "$lang" = "ca" ] ; then
|
||||
echo "Downloading catalan voice..."
|
||||
wget -q https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz
|
||||
tar -xf voice-ca-upc_ona-x-low.tar.gz
|
||||
mv ca-upc_ona-x-low.onnx ca.onnx
|
||||
mv ca-upc_ona-x-low.onnx.json ca.onnx.json
|
||||
rm -rf voice-ca-upc_ona-x-low.tar.gz
|
||||
echo "Done"
|
||||
fi
|
||||
done
|
||||
echo "Moving voices to piper/voices/"
|
||||
mv *.onnx* piper/voices/
|
||||
echo "Done. Piper installed!"
|
||||
else
|
||||
echo "TTS Disabled. No work to do..."
|
||||
fi
|
|
@ -1,6 +1,5 @@
|
|||
aiogram==2.25.1
|
||||
gTTS==2.3.1
|
||||
langdetect==1.0.9
|
||||
openai==0.27.2
|
||||
pydub==0.25.1
|
||||
python-dotenv==1.0.0
|
||||
pyttsx3==2.90
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
import subprocess
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
def text_to_speech(text: str) -> str:
|
||||
binary_path = "./piper"
|
||||
model_path = "blizzard_lessac-medium.onnx"
|
||||
|
||||
# Generate a unique temporary filename
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
||||
tmp_filename = tmp.name
|
||||
|
||||
# Construct the command to execute the binary
|
||||
cmd = f"echo '{text}' | {binary_path} --model {model_path} --output_file {tmp_filename}"
|
||||
|
||||
# Run the binary and wait for it to finish
|
||||
subprocess.run(cmd, shell=True, check=True)
|
||||
|
||||
# Return the temporary filename
|
||||
return tmp_filename
|
Ładowanie…
Reference in New Issue