Fix large voices, add voice language and readme

main
pluja 2023-04-20 11:24:32 +02:00
rodzic e193bf1989
commit 92fc09617a
3 zmienionych plików z 29 dodań i 23 usunięć

Wyświetl plik

@ -48,13 +48,15 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps:
1. Clone this repo.
2. Rename the `example.env` file to `.env`.
3. Edit the environment variables from the `.env` file:
1. Set your OPENAI_TOKEN.
2. Set your BOT_TOKEN.
3. Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
4. Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
5. Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
6. WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
6. ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices.
- Set your OPENAI_TOKEN.
- Set your BOT_TOKEN.
- Set your ALLOWED_USERS (comma separated user ids). Set it to `*` to allow all users.
- Set the SYSTEM_PROMPT for ChatGPT. This is always instructed to ChatGPT as the system.
- Optional: Edit the MAX_CONTEXT. This variable sets the number of messages that will be sent to ChatGPT API as context for the conversation.
- WHISPER_TO_CHAT allows you to choose wether Whisper transcripts should be instructed to ChatGPT or not.
- You can also configure this using `/settings` in chat.
- ENABLE_GOOGLE_TTS the TTS service will be provided by GoogleTTS, producing more natural voices. If disabled, it fallsback to local voice generation using Espeak.
- VOICE_LANGUAGE country code for the default voice accent.
4. Build and start the bot: `docker compose up --build -d`.
5. Enjoy!
@ -70,4 +72,6 @@ Self hosting this chatbot is pretty easy. You just need to follow this steps:
- Sending a voice message to the bot, it will transcribe it to text using Whisper.
- Using `/setttings` you can configure a few settings.
- `/info` command allows you to see your usage statistics.

Wyświetl plik

@ -9,6 +9,7 @@ CHATGPT_TEMPERATURE=1.0
WHISPER_TO_CHAT=1
# Use Google TTS for speech to text
ENABLE_GOOGLE_TTS=0
VOICE_LANGUAGE=en # en, es, fr, de, it, pt, ru, ja, ko
BOT_TOKEN=your-telegram-bot-token
BOT_ALLOWED_USERS= XXXX,YYYY # Comma separated list of Telegram user IDs

33
main.py
Wyświetl plik

@ -41,6 +41,7 @@ TEMPERATURE = os.environ.get("CHATGPT_TEMPERATURE")
MODEL = os.environ.get("OPENAI_MODEL")
WHISPER_TO_CHAT = bool(int(os.environ.get("WHISPER_TO_CHAT")))
ENABLE_GOOGLE_TTS = bool(int(os.environ.get("ENABLE_GOOGLE_TTS")))
VOICE_LANGUAGE = os.environ.get("VOICE_LANGUAGE")
MAX_USER_CONTEXT = int(os.environ.get("CHATGPT_MAX_USER_CONTEXT"))
openai.api_key = os.environ.get("OPENAI_API_KEY")
@ -82,6 +83,12 @@ def generate_settings_markup(chat_id: str) -> InlineKeyboardMarkup:
]
return InlineKeyboardMarkup(inline_keyboard=keyboard)
def change_voice(engine, gender='male'):
for voice in engine.getProperty('voices'):
if VOICE_LANGUAGE in voice.languages[0].decode('utf-8') and gender == voice.gender:
engine.setProperty('voice', voice.id)
return True
async def text_to_voice(text: str) -> BytesIO:
with tempfile.NamedTemporaryFile(mode='wb', suffix='.ogg', delete=False) as ogg_file:
temp_filename = ogg_file.name
@ -90,7 +97,7 @@ async def text_to_voice(text: str) -> BytesIO:
# If Google TTS is enabled, try to use it first
if ENABLE_GOOGLE_TTS:
try:
tts = gTTS(text)
tts = gTTS(text, lang=VOICE_LANGUAGE)
tts.save(temp_filename)
voice_done = True
except Exception as e:
@ -99,24 +106,16 @@ async def text_to_voice(text: str) -> BytesIO:
# If Google TTS is disabled or failed, use pyttsx3
if not voice_done:
engine = pyttsx3.init()
engine.setProperty('rate', 140)
change_voice(engine)
engine.setProperty('rate', 160)
engine.save_to_file(text, temp_filename)
engine.runAndWait()
await asyncio.sleep(0.5) # Add a small delay before reading the file
engine.stop()
# Add a small delay before reading the file
await asyncio.sleep(1)
with open(temp_filename, "rb") as audio_file:
try: # Try to read the file
voice_data = BytesIO(audio_file.read())
except Exception as e:
print(e)
# If reading the file fails, wait 0.6 seconds and try again
await asyncio.sleep(0.6) # Add a small delay before reading the file
try:
voice_data = BytesIO(audio_file.read())
except Exception:
# If reading the file fails again, return None
os.remove(temp_filename)
return
voice_data = BytesIO(audio_file.read())
os.remove(temp_filename)
voice_data.seek(0)
@ -359,7 +358,8 @@ if __name__ == '__main__':
try:
ALLOWED_USERS = os.environ.get("BOT_ALLOWED_USERS").split(",")
except (Exception):
except Exception as e:
print(e)
ALLOWED_USERS = ALLOWED_USERS
print(f"Allowed users: {ALLOWED_USERS}")
@ -369,4 +369,5 @@ if __name__ == '__main__':
# Register message handler and callback query handler for settings
dp.register_message_handler(settings, commands=['settings'])
dp.register_callback_query_handler(settings_callback, lambda c: c.data.startswith('setting_'))
executor.start_polling(dp, skip_updates=True)