From 96f4d79d30506998fb8ef99b9e16faf19fdcf745 Mon Sep 17 00:00:00 2001 From: J-Rios Date: Sat, 13 Apr 2019 12:26:45 +0200 Subject: [PATCH] Detect and remove Spam from non text messages too (resend messages, embedded links in text, images, audios, files, etc.). --- .gitignore | 1 + sources/constants.py | 51 ++++++++++++++++++++----------------- sources/join_captcha_bot.py | 27 ++++++++++++++++---- 3 files changed, 51 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 2095cc3..a2bb96a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ sources/.vscode/ sources/__pycache__/ sources/data/ +data/ *.pyc *.log test.py diff --git a/sources/constants.py b/sources/constants.py index 9e2fe50..540bf92 100644 --- a/sources/constants.py +++ b/sources/constants.py @@ -10,14 +10,15 @@ Author: Creation date: 09/09/2018 Last modified date: - 12/04/2019 + 13/04/2019 Version: - 1.2.2 + 1.2.3 ''' #################################################################################################### ### Constants ### + CONST = { 'TOKEN' : 'XXXXXXXXX:XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', # Bot Token (get it from @BotFather) 'DATA_DIR' : './data', # Data directory path @@ -30,26 +31,28 @@ CONST = { 'INIT_ENABLE' : True, # Initial enable/disable status at Bot start 'INIT_CAPTCHA_TIME_MIN' : 5, # Initial captcha solve time (in minutes) 'T_DEL_MSG' : 5, # Default time (in mins) to remove self-destruct sent messages from the Bot - 'F_TLDS' : './tlds-alpha-by-domain.txt', # IANA TLD list (https://data.iana.org/TLD/tlds-alpha-by-domain.txt) + 'F_TLDS' : 'tlds-alpha-by-domain.txt', # IANA TLD list (https://data.iana.org/TLD/tlds-alpha-by-domain.txt) 'REGEX_URLS' : r'((?<=[^a-zA-Z0-9])*(?:https\:\/\/|[a-zA-Z0-9]{{1,}}\.{{1}}|\b)(?:\w{{1,}}\.{{1}}){{1,5}}(?:{})\b/?(?!@))', 'DEVELOPER' : '@JoseTLG', # Bot developer 'REPOSITORY' : 'https://github.com/J-Rios/TLG_JoinCaptchaBot', # Bot code repository 'DEV_PAYPAL' : 'https://www.paypal.me/josrios', # Developer Paypal address 'DEV_BTC' : '3N9wf3FunR6YNXonquBeWammaBZVzTXTyR', # Developer Bitcoin address - 'VERSION' : '1.2.2 (12/04/2019)' # Bot version + 'VERSION' : '1.2.3 (13/04/2019)' # Bot version } TEXT = { 'EN' : { 'START' : \ - 'Hello, I am a Bot that send an image captcha for each new user who join a group, and ' \ - 'kick anyone that can\'t solve the captcha in a specified time. If one user try to ' \ - 'join the group for 3 times and never solve the captcha, I will assume that this ' \ - '"user" is a Bot, and It will be ban. Also, any message that contains an URL sent ' \ - 'by a new "user" before captcha completion, will be considered Spam and will be ' \ - 'deleted.\n' \ + 'Hello, I am a Bot that send an image captcha for each new user who join a group, ' \ + 'and kick anyone that can\'t solve the captcha in a specified time.\n' \ '\n' \ - 'Remember to give me administration privileges to kick-ban users and remove messages.' \ + 'If one user try to join the group for 3 times and never solve the captcha, I will ' \ + 'assume that this "user" is a Bot, and It will be ban. Also, any message that ' \ + 'contains an URL sent by a new "user" before captcha completion, will be considered ' \ + 'Spam and will be deleted.\n' \ + '\n' \ + 'Remember to give me administration privileges to kick-ban users and remove ' \ + 'messages.\n' \ '\n' \ 'Check /help command for more information about my usage.', @@ -62,8 +65,8 @@ TEXT = { '- If one user try to join the group for 3 times and never can\'t solve the captcha, ' \ 'I will assume that the "user" is a Bot, and it will be ban.\n' \ '\n' \ - '- Any message that contains an URL that has been sent by a new "user" before captcha ' \ - 'completion, will be considered Spam and will be deleted.\n' \ + '- Any message that contains an URL that has been sent by a new "user" before ' \ + 'captcha completion, will be considered Spam and will be deleted.\n' \ '\n' \ '- You need to provide me Administration rights for kick users and remove messages.\n' \ '\n' \ @@ -241,11 +244,12 @@ TEXT = { 'ES' : { 'START' : \ 'Hola, soy un Bot que envia una imagen captcha a cada nuevo usuario que se une al ' \ - 'grupo, y kickeo a los que no resuelvan el captcha en un tiempo determinado. Si un ' \ - 'usuario ha intentado unirse al grupo 3 veces y nunca resolvió el captcha, supondré ' \ - 'que ese "usuario" es un Bot y lo banearé. Además, cualquier mensaje que contenga ' \ - 'una URL y haya sido enviado por un nuevo "usuario" antes de que este haya resuelto ' \ - 'el captcha, será considerado un mensaje de Spam y será borrado.\n' \ + 'grupo, y kickeo a los que no resuelvan el captcha en un tiempo determinado.\n' \ + '\n' \ + 'Si un usuario ha intentado unirse al grupo 3 veces y nunca resolvió el captcha, ' \ + 'supondré que ese "usuario" es un Bot y lo banearé. Además, cualquier mensaje que ' \ + 'contenga una URL y haya sido enviado por un nuevo "usuario" antes de que este haya ' \ + 'resuelto el captcha, será considerado un mensaje de Spam y será borrado.\n' \ '\n' \ 'Recuerda que para funcionar de forma adecuada debes darme permisos de ' \ 'administración para suspender usuarios y eliminar mensajes del grupo.\n' \ @@ -449,11 +453,12 @@ TEXT = { 'PT_BR' : { 'START' : \ 'Olá, eu sou um Bot que envia um captcha de imagem para cada novo usuário que entra ' \ - 'no grupo e expulsa aquele que não enviar o captcha no tempo definido. Se um usuário ' \ - 'tentar entrar no grupo 3 vezes sem enviar o captcha corretamente, vou assumir que ' \ - 'esse "usuário" é um Bot, e ele será banido. Também, qualquer mensagem que contenha ' \ - 'um URL que tenha sido enviado por um novo "usuário" antes da conclusão do captcha, ' \ - 'será considerada Spam e será excluída.\n' \ + 'no grupo e expulsa aquele que não enviar o captcha no tempo definido.\n' \ + '\n' \ + 'Se um usuário tentar entrar no grupo 3 vezes sem enviar o captcha corretamente, vou ' \ + 'assumir que esse "usuário" é um Bot, e ele será banido. Também, qualquer mensagem ' \ + 'que contenha um URL que tenha sido enviado por um novo "usuário" antes da conclusão ' \ + 'do captcha, será considerada Spam e será excluída.\n' \ '\n' \ 'Lembre-se de dar privilégios de administrador para que eu possa expulsar-banir ' \ 'usuários e excluir mensagens do grupo.\n' \ diff --git a/sources/join_captcha_bot.py b/sources/join_captcha_bot.py index 2a0768e..aec4570 100644 --- a/sources/join_captcha_bot.py +++ b/sources/join_captcha_bot.py @@ -13,9 +13,9 @@ Author: Creation date: 09/09/2018 Last modified date: - 12/04/2019 + 13/04/2019 Version: - 1.2.2 + 1.2.3 ''' #################################################################################################### @@ -97,7 +97,8 @@ def initialize_resources(): for key, value in default_conf.items(): save_config_property(f_chat_id, key, value) # Load and generate URL detector regex from TLD list file - load_urls_regex(CONST["F_TLDS"]) + actual_script_path = path.dirname(path.realpath(__file__)) + load_urls_regex("{}/{}".format(actual_script_path, CONST["F_TLDS"])) def load_urls_regex(file_path): @@ -579,8 +580,22 @@ def msg_nocmd(bot, update): chat_id = update.message.chat_id chat_type = update.message.chat.type user_id = update.message.from_user.id - msg_text = update.message.text msg_id = update.message.message_id + msg_text = update.message.text + # Check if message has a text link (embedded url in text) and get it + msg_entities = getattr(update.message, "entities", None) + if msg_entities is not None: + for entity in msg_entities: + url = getattr(entity, "url", None) + if url is not None: + if url != "": + msg_text = "{} [{}]".format(msg_text, url) + break + # If message doesnt has text, check for caption fields (for no text msgs and resended ones) + if msg_text is None: + msg_text = getattr(update.message, "caption_html", None) + if msg_text is None: + msg_text = getattr(update.message, "caption", None) # Verify if we are in a group if chat_type != "private": # Get and update chat data @@ -1079,7 +1094,9 @@ def main(): updater = Updater(CONST["TOKEN"]) dp = updater.dispatcher # Set to dispatcher a not-command text messages handler - dp.add_handler(MessageHandler(Filters.text, msg_nocmd)) + dp.add_handler(MessageHandler(Filters.text | Filters.photo | Filters.audio | Filters.voice | \ + Filters.video | Filters.sticker | Filters.document | Filters.location | Filters.contact, \ + msg_nocmd)) # Set to dispatcher a new member join the group and member left the group events handlers dp.add_handler(MessageHandler(Filters.status_update.new_chat_members, msg_new_user)) # Set to dispatcher request new captcha button callback handler