From 92fc09617ad2f8bbfa19598be4451cb859b5a7f5 Mon Sep 17 00:00:00 2001
From: pluja <codeberg@r3d.red>
Date: Thu, 20 Apr 2023 11:24:32 +0200
Subject: [PATCH] Fix large voices, add voice language and readme

---
 README.md   | 18 +++++++++++-------
 example.env |  1 +
 main.py     | 33 +++++++++++++++++----------------
 3 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index ebd59e4..1705fd4 100644
--- a/README.md
+++ b/README.md
@@ -48,13 +48,15 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps:
    1. Clone this repo.
    2. Rename the `example.env` file to `.env`.
    3. Edit the environment variables from the `.env` file:
-      1. Set your OPENAI_TOKEN.
-      2. Set your BOT_TOKEN.
-      3. Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
-      4. Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
-      5. Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
-      6. WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
-      6. ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices.
+      - Set your OPENAI_TOKEN.
+      - Set your BOT_TOKEN.
+      - Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
+      - Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
+      - Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
+      - WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
+         - You can also configure this using `/settings` in chat.
+      - ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices. If disabled, it fallsback to local voice generation using Espeak.
+      - VOICE_LANGUAGE country code for the default voice accent.
    4. Build and start the bot: `docker compose up --build -d`.
    
 5. Enjoy!
@@ -70,4 +72,6 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps:
 
 - Sending a voice message to the bot, it will transcribe it to text using Whisper.
 
+- Using `/setttings` you can configure a few settings.
+
 - `/info` command allows you to see your usage statistics.
\ No newline at end of file
diff --git a/example.env b/example.env
index ed3c3db..8dd7f99 100644
--- a/example.env
+++ b/example.env
@@ -9,6 +9,7 @@ CHATGPT_TEMPERATURE=1.0
 WHISPER_TO_CHAT=1
 # Use Google TTS for speech to text
 ENABLE_GOOGLE_TTS=0
+VOICE_LANGUAGE=en # en, es, fr, de, it, pt, ru, ja, ko
 
 BOT_TOKEN=your-telegram-bot-token
 BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs
diff --git a/main.py b/main.py
index 874af28..b9c0282 100644
--- a/main.py
+++ b/main.py
@@ -41,6 +41,7 @@ TEMPERATURE = os.environ.get("CHATGPT_TEMPERATURE")
 MODEL = os.environ.get("OPENAI_MODEL")
 WHISPER_TO_CHAT = bool(int(os.environ.get("WHISPER_TO_CHAT")))
 ENABLE_GOOGLE_TTS = bool(int(os.environ.get("ENABLE_GOOGLE_TTS")))
+VOICE_LANGUAGE = os.environ.get("VOICE_LANGUAGE")
 MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
 openai.api_key = os.environ.get("OPENAI_API_KEY")
 
@@ -82,6 +83,12 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup:
     ]
     return InlineKeyboardMarkup(inline_keyboard=keyboard)
 
+def change_voice(engine, gender='male'):
+    for voice in engine.getProperty('voices'):
+        if VOICE_LANGUAGE in voice.languages[0].decode('utf-8') and gender == voice.gender:
+            engine.setProperty('voice', voice.id)
+            return True
+
 async def text_to_voice(text: str) -> BytesIO:
     with tempfile.NamedTemporaryFile(mode='wb', suffix='.ogg', delete=False) as ogg_file:
         temp_filename = ogg_file.name
@@ -90,7 +97,7 @@ async def text_to_voice(text: str) -> BytesIO:
         # If Google TTS is enabled, try to use it first
         if ENABLE_GOOGLE_TTS:
             try:
-                tts = gTTS(text)
+                tts = gTTS(text, lang=VOICE_LANGUAGE)
                 tts.save(temp_filename)
                 voice_done = True
             except Exception as e:
@@ -99,24 +106,16 @@ async def text_to_voice(text: str) -> BytesIO:
         # If Google TTS is disabled or failed, use pyttsx3
         if not voice_done:
             engine = pyttsx3.init()
-            engine.setProperty('rate', 140)
+            change_voice(engine)
+            engine.setProperty('rate', 160)
             engine.save_to_file(text, temp_filename)
             engine.runAndWait()
-            await asyncio.sleep(0.5)  # Add a small delay before reading the file
+            engine.stop()
+            # Add a small delay before reading the file
+            await asyncio.sleep(1)
 
     with open(temp_filename, "rb") as audio_file:
-        try: # Try to read the file
-            voice_data = BytesIO(audio_file.read())
-        except Exception as e:
-            print(e)
-            # If reading the file fails, wait 0.6 seconds and try again
-            await asyncio.sleep(0.6)  # Add a small delay before reading the file
-            try:
-                voice_data = BytesIO(audio_file.read())
-            except Exception:
-                # If reading the file fails again, return None
-                os.remove(temp_filename)
-                return
+        voice_data = BytesIO(audio_file.read())
 
     os.remove(temp_filename)
     voice_data.seek(0)
@@ -359,7 +358,8 @@ if __name__ == '__main__':
 
     try:
         ALLOWED_USERS = os.environ.get("BOT_ALLOWED_USERS").split(",")
-    except (Exception):
+    except Exception as e:
+        print(e)
         ALLOWED_USERS = ALLOWED_USERS
         
     print(f"Allowed users: {ALLOWED_USERS}")
@@ -369,4 +369,5 @@ if __name__ == '__main__':
     # Register message handler and callback query handler for settings
     dp.register_message_handler(settings, commands=['settings'])
     dp.register_callback_query_handler(settings_callback, lambda c: c.data.startswith('setting_'))
+    
     executor.start_polling(dp, skip_updates=True)
\ No newline at end of file