From 92fc09617ad2f8bbfa19598be4451cb859b5a7f5 Mon Sep 17 00:00:00 2001 From: pluja Date: Thu, 20 Apr 2023 11:24:32 +0200 Subject: [PATCH] Fix large voices, add voice language and readme --- README.md | 18 +++++++++++------- example.env | 1 + main.py | 33 +++++++++++++++++---------------- 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index ebd59e4..1705fd4 100644 --- a/README.md +++ b/README.md @@ -48,13 +48,15 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps: 1. Clone this repo. 2. Rename the `example.env` file to `.env`. 3. Edit the environment variables from the `.env` file: - 1. Set your OPENAI_TOKEN. - 2. Set your BOT_TOKEN. - 3. Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users. - 4. Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system. - 5. Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation. - 6. WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not. - 6. ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices. + - Set your OPENAI_TOKEN. + - Set your BOT_TOKEN. + - Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users. + - Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system. + - Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation. + - WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not. + - You can also configure this using `/settings` in chat. + - ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices. If disabled, it fallsback to local voice generation using Espeak. + - VOICE_LANGUAGE country code for the default voice accent. 4. Build and start the bot: `docker compose up --build -d`. 5. Enjoy! @@ -70,4 +72,6 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps: - Sending a voice message to the bot, it will transcribe it to text using Whisper. +- Using `/setttings` you can configure a few settings. + - `/info` command allows you to see your usage statistics. \ No newline at end of file diff --git a/example.env b/example.env index ed3c3db..8dd7f99 100644 --- a/example.env +++ b/example.env @@ -9,6 +9,7 @@ CHATGPT_TEMPERATURE=1.0 WHISPER_TO_CHAT=1 # Use Google TTS for speech to text ENABLE_GOOGLE_TTS=0 +VOICE_LANGUAGE=en # en, es, fr, de, it, pt, ru, ja, ko BOT_TOKEN=your-telegram-bot-token BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs diff --git a/main.py b/main.py index 874af28..b9c0282 100644 --- a/main.py +++ b/main.py @@ -41,6 +41,7 @@ TEMPERATURE = os.environ.get("CHATGPT_TEMPERATURE") MODEL = os.environ.get("OPENAI_MODEL") WHISPER_TO_CHAT = bool(int(os.environ.get("WHISPER_TO_CHAT"))) ENABLE_GOOGLE_TTS = bool(int(os.environ.get("ENABLE_GOOGLE_TTS"))) +VOICE_LANGUAGE = os.environ.get("VOICE_LANGUAGE") MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT")) openai.api_key = os.environ.get("OPENAI_API_KEY") @@ -82,6 +83,12 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup: ] return InlineKeyboardMarkup(inline_keyboard=keyboard) +def change_voice(engine, gender='male'): + for voice in engine.getProperty('voices'): + if VOICE_LANGUAGE in voice.languages[0].decode('utf-8') and gender == voice.gender: + engine.setProperty('voice', voice.id) + return True + async def text_to_voice(text: str) -> BytesIO: with tempfile.NamedTemporaryFile(mode='wb', suffix='.ogg', delete=False) as ogg_file: temp_filename = ogg_file.name @@ -90,7 +97,7 @@ async def text_to_voice(text: str) -> BytesIO: # If Google TTS is enabled, try to use it first if ENABLE_GOOGLE_TTS: try: - tts = gTTS(text) + tts = gTTS(text, lang=VOICE_LANGUAGE) tts.save(temp_filename) voice_done = True except Exception as e: @@ -99,24 +106,16 @@ async def text_to_voice(text: str) -> BytesIO: # If Google TTS is disabled or failed, use pyttsx3 if not voice_done: engine = pyttsx3.init() - engine.setProperty('rate', 140) + change_voice(engine) + engine.setProperty('rate', 160) engine.save_to_file(text, temp_filename) engine.runAndWait() - await asyncio.sleep(0.5) # Add a small delay before reading the file + engine.stop() + # Add a small delay before reading the file + await asyncio.sleep(1) with open(temp_filename, "rb") as audio_file: - try: # Try to read the file - voice_data = BytesIO(audio_file.read()) - except Exception as e: - print(e) - # If reading the file fails, wait 0.6 seconds and try again - await asyncio.sleep(0.6) # Add a small delay before reading the file - try: - voice_data = BytesIO(audio_file.read()) - except Exception: - # If reading the file fails again, return None - os.remove(temp_filename) - return + voice_data = BytesIO(audio_file.read()) os.remove(temp_filename) voice_data.seek(0) @@ -359,7 +358,8 @@ if __name__ == '__main__': try: ALLOWED_USERS = os.environ.get("BOT_ALLOWED_USERS").split(",") - except (Exception): + except Exception as e: + print(e) ALLOWED_USERS = ALLOWED_USERS print(f"Allowed users: {ALLOWED_USERS}") @@ -369,4 +369,5 @@ if __name__ == '__main__': # Register message handler and callback query handler for settings dp.register_message_handler(settings, commands=['settings']) dp.register_callback_query_handler(settings_callback, lambda c: c.data.startswith('setting_')) + executor.start_polling(dp, skip_updates=True) \ No newline at end of file