Detect and remove Spam from non text messages too (resend messages, embedded links in text, images, audios, files, etc.).

pull/2/head
J-Rios 2019-04-13 12:26:45 +02:00
rodzic 7bfb395a1c
commit 96f4d79d30
3 zmienionych plików z 51 dodań i 28 usunięć

1
.gitignore vendored
Wyświetl plik

@ -1,6 +1,7 @@
sources/.vscode/
sources/__pycache__/
sources/data/
data/
*.pyc
*.log
test.py

Wyświetl plik

@ -10,14 +10,15 @@ Author:
Creation date:
09/09/2018
Last modified date:
12/04/2019
13/04/2019
Version:
1.2.2
1.2.3
'''
####################################################################################################
### Constants ###
CONST = {
'TOKEN' : 'XXXXXXXXX:XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', # Bot Token (get it from @BotFather)
'DATA_DIR' : './data', # Data directory path
@ -30,26 +31,28 @@ CONST = {
'INIT_ENABLE' : True, # Initial enable/disable status at Bot start
'INIT_CAPTCHA_TIME_MIN' : 5, # Initial captcha solve time (in minutes)
'T_DEL_MSG' : 5, # Default time (in mins) to remove self-destruct sent messages from the Bot
'F_TLDS' : './tlds-alpha-by-domain.txt', # IANA TLD list (https://data.iana.org/TLD/tlds-alpha-by-domain.txt)
'F_TLDS' : 'tlds-alpha-by-domain.txt', # IANA TLD list (https://data.iana.org/TLD/tlds-alpha-by-domain.txt)
'REGEX_URLS' : r'((?<=[^a-zA-Z0-9])*(?:https\:\/\/|[a-zA-Z0-9]{{1,}}\.{{1}}|\b)(?:\w{{1,}}\.{{1}}){{1,5}}(?:{})\b/?(?!@))',
'DEVELOPER' : '@JoseTLG', # Bot developer
'REPOSITORY' : 'https://github.com/J-Rios/TLG_JoinCaptchaBot', # Bot code repository
'DEV_PAYPAL' : 'https://www.paypal.me/josrios', # Developer Paypal address
'DEV_BTC' : '3N9wf3FunR6YNXonquBeWammaBZVzTXTyR', # Developer Bitcoin address
'VERSION' : '1.2.2 (12/04/2019)' # Bot version
'VERSION' : '1.2.3 (13/04/2019)' # Bot version
}
TEXT = {
'EN' : {
'START' : \
'Hello, I am a Bot that send an image captcha for each new user who join a group, and ' \
'kick anyone that can\'t solve the captcha in a specified time. If one user try to ' \
'join the group for 3 times and never solve the captcha, I will assume that this ' \
'"user" is a Bot, and It will be ban. Also, any message that contains an URL sent ' \
'by a new "user" before captcha completion, will be considered Spam and will be ' \
'deleted.\n' \
'Hello, I am a Bot that send an image captcha for each new user who join a group, ' \
'and kick anyone that can\'t solve the captcha in a specified time.\n' \
'\n' \
'Remember to give me administration privileges to kick-ban users and remove messages.' \
'If one user try to join the group for 3 times and never solve the captcha, I will ' \
'assume that this "user" is a Bot, and It will be ban. Also, any message that ' \
'contains an URL sent by a new "user" before captcha completion, will be considered ' \
'Spam and will be deleted.\n' \
'\n' \
'Remember to give me administration privileges to kick-ban users and remove ' \
'messages.\n' \
'\n' \
'Check /help command for more information about my usage.',
@ -62,8 +65,8 @@ TEXT = {
'- If one user try to join the group for 3 times and never can\'t solve the captcha, ' \
'I will assume that the "user" is a Bot, and it will be ban.\n' \
'\n' \
'- Any message that contains an URL that has been sent by a new "user" before captcha ' \
'completion, will be considered Spam and will be deleted.\n' \
'- Any message that contains an URL that has been sent by a new "user" before ' \
'captcha completion, will be considered Spam and will be deleted.\n' \
'\n' \
'- You need to provide me Administration rights for kick users and remove messages.\n' \
'\n' \
@ -241,11 +244,12 @@ TEXT = {
'ES' : {
'START' : \
'Hola, soy un Bot que envia una imagen captcha a cada nuevo usuario que se une al ' \
'grupo, y kickeo a los que no resuelvan el captcha en un tiempo determinado. Si un ' \
'usuario ha intentado unirse al grupo 3 veces y nunca resolvió el captcha, supondré ' \
'que ese "usuario" es un Bot y lo banearé. Además, cualquier mensaje que contenga ' \
'una URL y haya sido enviado por un nuevo "usuario" antes de que este haya resuelto ' \
'el captcha, será considerado un mensaje de Spam y será borrado.\n' \
'grupo, y kickeo a los que no resuelvan el captcha en un tiempo determinado.\n' \
'\n' \
'Si un usuario ha intentado unirse al grupo 3 veces y nunca resolvió el captcha, ' \
'supondré que ese "usuario" es un Bot y lo banearé. Además, cualquier mensaje que ' \
'contenga una URL y haya sido enviado por un nuevo "usuario" antes de que este haya ' \
'resuelto el captcha, será considerado un mensaje de Spam y será borrado.\n' \
'\n' \
'Recuerda que para funcionar de forma adecuada debes darme permisos de ' \
'administración para suspender usuarios y eliminar mensajes del grupo.\n' \
@ -449,11 +453,12 @@ TEXT = {
'PT_BR' : {
'START' : \
'Olá, eu sou um Bot que envia um captcha de imagem para cada novo usuário que entra ' \
'no grupo e expulsa aquele que não enviar o captcha no tempo definido. Se um usuário ' \
'tentar entrar no grupo 3 vezes sem enviar o captcha corretamente, vou assumir que ' \
'esse "usuário" é um Bot, e ele será banido. Também, qualquer mensagem que contenha ' \
'um URL que tenha sido enviado por um novo "usuário" antes da conclusão do captcha, ' \
'será considerada Spam e será excluída.\n' \
'no grupo e expulsa aquele que não enviar o captcha no tempo definido.\n' \
'\n' \
'Se um usuário tentar entrar no grupo 3 vezes sem enviar o captcha corretamente, vou ' \
'assumir que esse "usuário" é um Bot, e ele será banido. Também, qualquer mensagem ' \
'que contenha um URL que tenha sido enviado por um novo "usuário" antes da conclusão ' \
'do captcha, será considerada Spam e será excluída.\n' \
'\n' \
'Lembre-se de dar privilégios de administrador para que eu possa expulsar-banir ' \
'usuários e excluir mensagens do grupo.\n' \

Wyświetl plik

@ -13,9 +13,9 @@ Author:
Creation date:
09/09/2018
Last modified date:
12/04/2019
13/04/2019
Version:
1.2.2
1.2.3
'''
####################################################################################################
@ -97,7 +97,8 @@ def initialize_resources():
for key, value in default_conf.items():
save_config_property(f_chat_id, key, value)
# Load and generate URL detector regex from TLD list file
load_urls_regex(CONST["F_TLDS"])
actual_script_path = path.dirname(path.realpath(__file__))
load_urls_regex("{}/{}".format(actual_script_path, CONST["F_TLDS"]))
def load_urls_regex(file_path):
@ -579,8 +580,22 @@ def msg_nocmd(bot, update):
chat_id = update.message.chat_id
chat_type = update.message.chat.type
user_id = update.message.from_user.id
msg_text = update.message.text
msg_id = update.message.message_id
msg_text = update.message.text
# Check if message has a text link (embedded url in text) and get it
msg_entities = getattr(update.message, "entities", None)
if msg_entities is not None:
for entity in msg_entities:
url = getattr(entity, "url", None)
if url is not None:
if url != "":
msg_text = "{} [{}]".format(msg_text, url)
break
# If message doesnt has text, check for caption fields (for no text msgs and resended ones)
if msg_text is None:
msg_text = getattr(update.message, "caption_html", None)
if msg_text is None:
msg_text = getattr(update.message, "caption", None)
# Verify if we are in a group
if chat_type != "private":
# Get and update chat data
@ -1079,7 +1094,9 @@ def main():
updater = Updater(CONST["TOKEN"])
dp = updater.dispatcher
# Set to dispatcher a not-command text messages handler
dp.add_handler(MessageHandler(Filters.text, msg_nocmd))
dp.add_handler(MessageHandler(Filters.text | Filters.photo | Filters.audio | Filters.voice | \
Filters.video | Filters.sticker | Filters.document | Filters.location | Filters.contact, \
msg_nocmd))
# Set to dispatcher a new member join the group and member left the group events handlers
dp.add_handler(MessageHandler(Filters.status_update.new_chat_members, msg_new_user))
# Set to dispatcher request new captcha button callback handler