From 6f7d8d4c09f85873009598115abd72468f2dc4ce Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 17 Aug 2024 21:58:34 +0000 Subject: [PATCH] Remove unused contacts --- src/Protocol/ActivityPub/Transmitter.php | 2 +- src/Worker/RemoveUnusedContacts.php | 79 ++++++++++++++---------- static/defaults.config.php | 6 ++ 3 files changed, 55 insertions(+), 32 deletions(-) diff --git a/src/Protocol/ActivityPub/Transmitter.php b/src/Protocol/ActivityPub/Transmitter.php index 20ff0e6f3b..2792eab2d6 100644 --- a/src/Protocol/ActivityPub/Transmitter.php +++ b/src/Protocol/ActivityPub/Transmitter.php @@ -928,7 +928,7 @@ class Transmitter * * @param array $item * @param boolean $blindcopy - * @return void + * @return array */ public static function getReceiversForUriId(int $uri_id, bool $blindcopy): array { diff --git a/src/Worker/RemoveUnusedContacts.php b/src/Worker/RemoveUnusedContacts.php index f1a746eb5c..6f9d2124e1 100644 --- a/src/Worker/RemoveUnusedContacts.php +++ b/src/Worker/RemoveUnusedContacts.php @@ -24,8 +24,10 @@ namespace Friendica\Worker; use Friendica\Contact\Avatar; use Friendica\Core\Logger; use Friendica\Core\Protocol; +use Friendica\Core\Worker; use Friendica\Database\DBA; use Friendica\Database\DBStructure; +use Friendica\DI; use Friendica\Model\Contact; use Friendica\Model\Photo; use Friendica\Util\DateTimeFormat; @@ -37,28 +39,53 @@ class RemoveUnusedContacts { public static function execute() { - $condition = ["`id` != ? AND `uid` = ? AND NOT `self` AND NOT `nurl` IN (SELECT `nurl` FROM `contact` WHERE `uid` != ?) - AND (NOT `network` IN (?, ?, ?, ?, ?, ?) OR (`archive` AND `success_update` < ?)) - AND NOT `id` IN (SELECT `author-id` FROM `post-user` WHERE `author-id` = `contact`.`id`) - AND NOT `id` IN (SELECT `owner-id` FROM `post-user` WHERE `owner-id` = `contact`.`id`) - AND NOT `id` IN (SELECT `causer-id` FROM `post-user` WHERE `causer-id` IS NOT NULL AND `causer-id` = `contact`.`id`) - AND NOT `id` IN (SELECT `cid` FROM `post-tag` WHERE `cid` = `contact`.`id`) - AND NOT `id` IN (SELECT `contact-id` FROM `post-user` WHERE `contact-id` = `contact`.`id`) - AND NOT `id` IN (SELECT `cid` FROM `user-contact` WHERE `cid` = `contact`.`id`) - AND NOT `id` IN (SELECT `cid` FROM `event` WHERE `cid` = `contact`.`id`) - AND NOT `id` IN (SELECT `cid` FROM `group` WHERE `cid` = `contact`.`id`) - AND NOT `id` IN (SELECT `cid` FROM `delivery-queue` WHERE `cid` = `contact`.`id`) - AND NOT `id` IN (SELECT `author-id` FROM `mail` WHERE `author-id` = `contact`.`id`) - AND NOT `id` IN (SELECT `contact-id` FROM `mail` WHERE `contact-id` = `contact`.`id`) - AND NOT `id` IN (SELECT `contact-id` FROM `group_member` WHERE `contact-id` = `contact`.`id`) - AND `created` < ?", - 0, 0, 0, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, Protocol::FEED, Protocol::MAIL, Protocol::ACTIVITYPUB, DateTimeFormat::utc('now - 365 days'), DateTimeFormat::utc('now - 30 days')]; + $loop = 0; + while (self::removeContacts(++$loop)) { + Logger::info('In removal', ['loop' => $loop]); + } - $total = DBA::count('contact', $condition); - Logger::notice('Starting removal', ['total' => $total]); + Logger::notice('Remove apcontact entries with no related contact'); + DBA::delete('apcontact', ["`uri-id` NOT IN (SELECT `uri-id` FROM `contact`) AND `updated` < ?", DateTimeFormat::utc('now - 30 days')]); + Logger::notice('Removed apcontact entries with no related contact', ['count' => DBA::affectedRows()]); + + Logger::notice('Remove diaspora-contact entries with no related contact'); + DBA::delete('diaspora-contact', ["`uri-id` NOT IN (SELECT `uri-id` FROM `contact`) AND `updated` < ?", DateTimeFormat::utc('now - 30 days')]); + Logger::notice('Removed diaspora-contact entries with no related contact', ['count' => DBA::affectedRows()]); + } + + public static function removeContacts(int $loop): bool + { + Logger::notice('Starting removal', ['loop' => $loop]); + + $condition = [ + "`id` != ? AND `uid` = ? AND NOT `self` AND NOT `uri-id` IN (SELECT `uri-id` FROM `contact` WHERE `uid` != ?) + AND NOT EXISTS(SELECT `author-id` FROM `post-user` WHERE `author-id` = `contact`.`id`) + AND NOT EXISTS(SELECT `owner-id` FROM `post-user` WHERE `owner-id` = `contact`.`id`) + AND NOT EXISTS(SELECT `causer-id` FROM `post-user` WHERE `causer-id` IS NOT NULL AND `causer-id` = `contact`.`id`) + AND NOT EXISTS(SELECT `cid` FROM `post-tag` WHERE `cid` = `contact`.`id`) + AND NOT EXISTS(SELECT `contact-id` FROM `post-user` WHERE `contact-id` = `contact`.`id`) + AND NOT EXISTS(SELECT `cid` FROM `user-contact` WHERE `cid` = `contact`.`id`) + AND NOT EXISTS(SELECT `cid` FROM `event` WHERE `cid` = `contact`.`id`) + AND NOT EXISTS(SELECT `cid` FROM `group` WHERE `cid` = `contact`.`id`) + AND NOT EXISTS(SELECT `author-id` FROM `mail` WHERE `author-id` = `contact`.`id`) + AND NOT EXISTS(SELECT `contact-id` FROM `mail` WHERE `contact-id` = `contact`.`id`) + AND NOT EXISTS(SELECT `contact-id` FROM `group_member` WHERE `contact-id` = `contact`.`id`) + AND `created` < ?", 0, 0, 0, DateTimeFormat::utc('now - 7 days') + ]; + + if (!DI::config()->get('remove_all_unused_contacts')) { + $condition2 = [ + "(NOT `network` IN (?, ?, ?, ?, ?, ?) OR `archive`)", + Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, Protocol::FEED, Protocol::MAIL, Protocol::ACTIVITYPUB + ]; + + $condition = DBA::mergeConditions($condition2, $condition); + } + + $contacts = DBA::select('contact', ['id', 'uid', 'photo', 'thumb', 'micro'], $condition, ['limit' => 1000]); $count = 0; - $contacts = DBA::select('contact', ['id', 'uid', 'photo', 'thumb', 'micro'], $condition); while ($contact = DBA::fetch($contacts)) { + ++$count; Photo::delete(['uid' => $contact['uid'], 'contact-id' => $contact['id']]); Avatar::deleteCache($contact); @@ -87,19 +114,9 @@ class RemoveUnusedContacts DBA::delete('post-thread-user', ['causer-id' => $contact['id']]); Contact::deleteById($contact['id']); - if ((++$count % 1000) == 0) { - Logger::info('In removal', ['count' => $count, 'total' => $total]); - } } DBA::close($contacts); - Logger::notice('Removal done', ['count' => $count, 'total' => $total]); - - Logger::notice('Remove apcontact entries with no related contact'); - DBA::delete('apcontact', ["`uri-id` NOT IN (SELECT `uri-id` FROM `contact`) AND `updated` < ?", DateTimeFormat::utc('now - 30 days')]); - Logger::notice('Removed apcontact entries with no related contact', ['count' => DBA::affectedRows()]); - - Logger::notice('Remove diaspora-contact entries with no related contact'); - DBA::delete('diaspora-contact', ["`uri-id` NOT IN (SELECT `uri-id` FROM `contact`) AND `updated` < ?", DateTimeFormat::utc('now - 30 days')]); - Logger::notice('Removed diaspora-contact entries with no related contact', ['count' => DBA::affectedRows()]); + Logger::notice('Removal done', ['count' => $count]); + return ($count == 1000 && Worker::isInMaintenanceWindow()); } } diff --git a/static/defaults.config.php b/static/defaults.config.php index 6bdc9c53b5..49f8d6e5fd 100644 --- a/static/defaults.config.php +++ b/static/defaults.config.php @@ -514,6 +514,12 @@ return [ // Redistribute incoming activities via ActivityPub 'redistribute_activities' => true, + // remove_all_unused_contacts (Boolean) + // Remove all unused contacts. + // Per default only archived contacts are removed from federated services. + // Unused contacts from connector networks will be removed in any case. + 'remove_all_unused_contacts' => false, + // session_handler (database|cache|native) // Whether to use Cache to store session data or to use PHP native session storage. 'session_handler' => 'database',