Fix large voices, add voice language and readme

add error handling for pyttsx3 file writing
Fix text_to_voice and always fallback to pyttsx3
2023-04-20 11:24:32 +02:00 · 2023-04-20 10:10:56 +02:00 · 2023-04-19 23:12:44 +02:00
--- a/README.md
+++ b/README.md
@ -48,13 +48,15 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps:
   1. Clone this repo.
   2. Rename the `example.env` file to `.env`.
   3. Edit the environment variables from the `.env` file:
-      1. Set your OPENAI_TOKEN.
-      2. Set your BOT_TOKEN.
-      3. Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
-      4. Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
-      5. Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
-      6. WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
-      6. ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices.
+      - Set your OPENAI_TOKEN.
+      - Set your BOT_TOKEN.
+      - Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
+      - Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
+      - Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
+      - WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
+         - You can also configure this using `/settings` in chat.
+      - ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices. If disabled, it fallsback to local voice generation using Espeak.
+      - VOICE_LANGUAGE country code for the default voice accent.
   4. Build and start the bot: `docker compose up --build -d`.
   
 5. Enjoy!
@ -70,4 +72,6 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps:

 - Sending a voice message to the bot, it will transcribe it to text using Whisper.

+- Using `/setttings` you can configure a few settings.
+
 - `/info` command allows you to see your usage statistics.
--- a/example.env
+++ b/example.env
@ -9,6 +9,7 @@ CHATGPT_TEMPERATURE=1.0
 WHISPER_TO_CHAT=1
 # Use Google TTS for speech to text
 ENABLE_GOOGLE_TTS=0
+VOICE_LANGUAGE=en # en, es, fr, de, it, pt, ru, ja, ko

 BOT_TOKEN=your-telegram-bot-token
 BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs
--- a/main.py
+++ b/main.py
@ -1,3 +1,4 @@
+import asyncio
 import logging
 import os
 import tempfile
@ -5,14 +6,13 @@ from functools import wraps
 from io import BytesIO

 import openai
+import pyttsx3
 from aiogram import Bot, Dispatcher, types
 from aiogram.contrib.middlewares.logging import LoggingMiddleware
 from aiogram.types import InlineKeyboardButton, InlineKeyboardMarkup, ParseMode
-from aiogram.types.input_file import InputFile
 from aiogram.utils import executor
 from dotenv import load_dotenv
 from gtts import gTTS
-import pyttsx3
 from pydub import AudioSegment

 import database
@ -41,6 +41,7 @@ TEMPERATURE = os.environ.get("CHATGPT_TEMPERATURE")
 MODEL = os.environ.get("OPENAI_MODEL")
 WHISPER_TO_CHAT = bool(int(os.environ.get("WHISPER_TO_CHAT")))
 ENABLE_GOOGLE_TTS = bool(int(os.environ.get("ENABLE_GOOGLE_TTS")))
+VOICE_LANGUAGE = os.environ.get("VOICE_LANGUAGE")
 MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
 openai.api_key = os.environ.get("OPENAI_API_KEY")

@ -82,34 +83,42 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup:
    ]
    return InlineKeyboardMarkup(inline_keyboard=keyboard)

-async def text_to_voice(text: str) -> BytesIO:
-    if ENABLE_GOOGLE_TTS:
-        tts = gTTS(text)    
-        with tempfile.NamedTemporaryFile(mode='wb', suffix='.mp3', delete=False) as mp3_file:
-            temp_mp3_filename = mp3_file.name
-            tts.save(temp_mp3_filename)
-    else:
-        engine = pyttsx3.init() # PyTTSX3 Engine
-        engine.setProperty('rate', 150)
-        with tempfile.NamedTemporaryFile(mode='wb', suffix='.mp3', delete=False) as mp3_file:
-            temp_mp3_filename = mp3_file.name
-            engine.save_to_file(text, temp_mp3_filename)
-            engine.runAndWait()
-            
-    mp3_audio = AudioSegment.from_file(temp_mp3_filename, format="mp3")
-    with tempfile.NamedTemporaryFile(mode="wb", suffix=".ogg", delete=False) as ogg_file:
-        temp_ogg_filename = ogg_file.name
-        mp3_audio.export(temp_ogg_filename, format="ogg")
+def change_voice(engine, gender='male'):
+    for voice in engine.getProperty('voices'):
+        if VOICE_LANGUAGE in voice.languages[0].decode('utf-8') and gender == voice.gender:
+            engine.setProperty('voice', voice.id)
+            return True

-    with open(temp_ogg_filename, "rb") as audio_file:
-        _ = InputFile(audio_file)
+async def text_to_voice(text: str) -> BytesIO:
+    with tempfile.NamedTemporaryFile(mode='wb', suffix='.ogg', delete=False) as ogg_file:
+        temp_filename = ogg_file.name
+        voice_done = False
+        
+        # If Google TTS is enabled, try to use it first
+        if ENABLE_GOOGLE_TTS:
+            try:
+                tts = gTTS(text, lang=VOICE_LANGUAGE)
+                tts.save(temp_filename)
+                voice_done = True
+            except Exception as e:
+                print("Google TTS failed, falling back to pyttsx3: --> ", e)
+        
+        # If Google TTS is disabled or failed, use pyttsx3
+        if not voice_done:
+            engine = pyttsx3.init()
+            change_voice(engine)
+            engine.setProperty('rate', 160)
+            engine.save_to_file(text, temp_filename)
+            engine.runAndWait()
+            engine.stop()
+            # Add a small delay before reading the file
+            await asyncio.sleep(1)
+
+    with open(temp_filename, "rb") as audio_file:
        voice_data = BytesIO(audio_file.read())

-    os.remove(temp_mp3_filename)
-    os.remove(temp_ogg_filename)
-
+    os.remove(temp_filename)
    voice_data.seek(0)
-    
    return voice_data
    

@ -342,7 +351,6 @@ async def chat(message: types.Message):
    if user_data["options"]["assistant_voice_chat"]:
        await bot.send_chat_action(chat_id, action=types.ChatActions.TYPING)
        voice_data = await text_to_voice(assistant_message)
-        
        await message.reply_voice(voice_data)

 if __name__ == '__main__':
@ -350,7 +358,8 @@ if __name__ == '__main__':

    try:
        ALLOWED_USERS = os.environ.get("BOT_ALLOWED_USERS").split(",")
-    except (Exception):
+    except Exception as e:
+        print(e)
        ALLOWED_USERS = ALLOWED_USERS
        
    print(f"Allowed users: {ALLOWED_USERS}")
@ -360,4 +369,5 @@ if __name__ == '__main__':
    # Register message handler and callback query handler for settings
    dp.register_message_handler(settings, commands=['settings'])
    dp.register_callback_query_handler(settings_callback, lambda c: c.data.startswith('setting_'))
+    
    executor.start_polling(dp, skip_updates=True)
Autor	SHA1	Wiadomość	Data
pluja	92fc09617a	Fix large voices, add voice language and readme	2023-04-20 11:24:32 +02:00
pluja	e193bf1989	add error handling for pyttsx3 file writing	2023-04-20 10:10:56 +02:00
pluja	8871deb5ac	Fix text_to_voice and always fallback to pyttsx3	2023-04-19 23:12:44 +02:00