From edbdfbae6b78086c220c0ad116c0fa7deb03a6e8 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 3 Dec 2020 15:47:50 +0000 Subject: [PATCH 1/5] workerqueue now has a "command" field --- src/Core/Worker.php | 51 ++++++++++++++++------ src/Model/GServer.php | 68 ++++++++++++++++++++++++----- src/Worker/Cron.php | 5 ++- src/Worker/UpdateGServer.php | 11 ++--- src/Worker/UpdateGServers.php | 32 +++++++------- src/Worker/UpdatePublicContacts.php | 21 ++++++--- static/dbstructure.config.php | 12 +++-- 7 files changed, 145 insertions(+), 55 deletions(-) diff --git a/src/Core/Worker.php b/src/Core/Worker.php index 33e66ae1c..4f4552776 100644 --- a/src/Core/Worker.php +++ b/src/Core/Worker.php @@ -300,7 +300,11 @@ class Worker return false; } - $argv = json_decode($queue["parameter"], true); + $argv = json_decode($queue['parameter'], true); + if (!empty($queue['command'])) { + array_unshift($argv, $queue['command']); + } + if (empty($argv)) { Logger::warning('Parameter is empty', ['queue' => $queue]); return false; @@ -576,7 +580,7 @@ class Worker $stamp = (float)microtime(true); $entries = DBA::select( 'workerqueue', - ['id', 'pid', 'executed', 'priority', 'parameter'], + ['id', 'pid', 'executed', 'priority', 'command', 'parameter'], ['NOT `done` AND `pid` != 0'], ['order' => ['priority', 'retrial', 'created']] ); @@ -603,17 +607,21 @@ class Worker $max_duration_defaults = [PRIORITY_CRITICAL => 720, PRIORITY_HIGH => 10, PRIORITY_MEDIUM => 60, PRIORITY_LOW => 180, PRIORITY_NEGLIGIBLE => 720]; $max_duration = $max_duration_defaults[$entry["priority"]]; - $argv = json_decode($entry["parameter"], true); - if (empty($argv)) { + $argv = json_decode($entry['parameter'], true); + if (!empty($entry['command'])) { + $command = $entry['command']; + } elseif (!empty($argv)) { + $command = array_shift($argv); + } else { return; } - $argv[0] = basename($argv[0]); + $command = basename($command); // How long is the process already running? $duration = (time() - strtotime($entry["executed"])) / 60; if ($duration > $max_duration) { - Logger::notice("Worker process ".$entry["pid"]." (".substr(json_encode($argv), 0, 50).") took more than ".$max_duration." minutes. It will be killed now."); + Logger::notice('Worker process took too much time - killed', ['duration' => number_format($duration, 3), 'max' => $max_duration, 'id' => $entry["id"], 'pid' => $entry["pid"], 'command' => $command]); posix_kill($entry["pid"], SIGTERM); // We killed the stale process. @@ -636,7 +644,7 @@ class Worker self::$db_duration += (microtime(true) - $stamp); self::$db_duration_write += (microtime(true) - $stamp); } else { - Logger::info('Process runtime is okay', ['pid' => $entry["pid"], 'duration' => $duration, 'max' => $max_duration, 'command' => substr(json_encode($argv), 0, 50)]); + Logger::info('Process runtime is okay', ['duration' => number_format($duration, 3), 'max' => $max_duration, 'id' => $entry["id"], 'pid' => $entry["pid"], 'command' => $command]); } } } @@ -848,12 +856,17 @@ class Worker $ids = []; $stamp = (float)microtime(true); $condition = ["`priority` = ? AND `pid` = 0 AND NOT `done` AND `next_try` < ?", $priority, DateTimeFormat::utcNow()]; - $tasks = DBA::select('workerqueue', ['id', 'parameter'], $condition, ['limit' => $limit, 'order' => ['retrial', 'created']]); + $tasks = DBA::select('workerqueue', ['id', 'command', 'parameter'], $condition, ['limit' => $limit, 'order' => ['retrial', 'created']]); self::$db_duration += (microtime(true) - $stamp); while ($task = DBA::fetch($tasks)) { $ids[] = $task['id']; // Only continue that loop while we are storing commands that can be processed quickly - $command = json_decode($task['parameter'])[0]; + if (!empty($task['command'])) { + $command = $task['command']; + } else { + $command = json_decode($task['parameter'])[0]; + } + if (!in_array($command, self::FAST_COMMANDS)) { break; } @@ -968,13 +981,17 @@ class Worker if ($limit > 0) { $stamp = (float)microtime(true); $condition = ["`pid` = 0 AND NOT `done` AND `next_try` < ?", DateTimeFormat::utcNow()]; - $tasks = DBA::select('workerqueue', ['id', 'parameter'], $condition, ['limit' => $limit, 'order' => ['priority', 'retrial', 'created']]); + $tasks = DBA::select('workerqueue', ['id', 'command', 'parameter'], $condition, ['limit' => $limit, 'order' => ['priority', 'retrial', 'created']]); self::$db_duration += (microtime(true) - $stamp); while ($task = DBA::fetch($tasks)) { $ids[] = $task['id']; // Only continue that loop while we are storing commands that can be processed quickly - $command = json_decode($task['parameter'])[0]; + if (!empty($task['command'])) { + $command = $task['command']; + } else { + $command = json_decode($task['parameter'])[0]; + } if (!in_array($command, self::FAST_COMMANDS)) { break; } @@ -1242,8 +1259,9 @@ class Worker } } + $command = array_shift($args); $parameters = json_encode($args); - $found = DBA::exists('workerqueue', ['parameter' => $parameters, 'done' => false]); + $found = DBA::exists('workerqueue', ['command' => $command, 'parameter' => $parameters, 'done' => false]); $added = false; // Quit if there was a database error - a precaution for the update process to 3.5.3 @@ -1252,13 +1270,13 @@ class Worker } if (!$found) { - $added = DBA::insert('workerqueue', ['parameter' => $parameters, 'created' => $created, + $added = DBA::insert('workerqueue', ['command' => $command, 'parameter' => $parameters, 'created' => $created, 'priority' => $priority, 'next_try' => $delayed]); if (!$added) { return false; } } elseif ($force_priority) { - DBA::update('workerqueue', ['priority' => $priority], ['parameter' => $parameters, 'done' => false, 'pid' => 0]); + DBA::update('workerqueue', ['priority' => $priority], ['command' => $command, 'parameter' => $parameters, 'done' => false, 'pid' => 0]); } // Set the IPC flag to ensure an immediate process execution via daemon @@ -1297,6 +1315,11 @@ class Worker return $added; } + public static function countWorkersByCommand(string $command) + { + return DBA::count('workerqueue', ['done' => false, 'pid' => 0, 'command' => $command]); + } + /** * Returns the next retrial level for worker jobs. * This function will skip levels when jobs are older. diff --git a/src/Model/GServer.php b/src/Model/GServer.php index 713d6f114..c035abc73 100644 --- a/src/Model/GServer.php +++ b/src/Model/GServer.php @@ -124,6 +124,55 @@ class GServer return self::check($server, $network, $force); } + public static function getNextUpdateDate(bool $success, string $created = '', string $last_contact = '') + { + // On successful contact process check again next week + if ($success) { + return DateTimeFormat::utc('now +7 day'); + } + + $now = strtotime(DateTimeFormat::utcNow()); + + if ($created > $last_contact) { + $contact_time = strtotime($created); + } else { + $contact_time = strtotime($last_contact); + } + + // If the last contact was less than 6 hours before then try again in 6 hours + if (($now - $contact_time) < (60 * 60 * 6)) { + return DateTimeFormat::utc('now +6 hour'); + } + + // If the last contact was less than 12 hours before then try again in 12 hours + if (($now - $contact_time) < (60 * 60 * 12)) { + return DateTimeFormat::utc('now +12 hour'); + } + + // If the last contact was less than 24 hours before then try tomorrow again + if (($now - $contact_time) < (60 * 60 * 24)) { + return DateTimeFormat::utc('now +1 day'); + } + + // If the last contact was less than a week before then try again in a week + if (($now - $contact_time) < (60 * 60 * 24 * 7)) { + return DateTimeFormat::utc('now +1 week'); + } + + // If the last contact was less than two weeks before then try again in two week + if (($now - $contact_time) < (60 * 60 * 24 * 14)) { + return DateTimeFormat::utc('now +2 week'); + } + + // If the last contact was less than a month before then try again in a month + if (($now - $contact_time) < (60 * 60 * 24 * 30)) { + return DateTimeFormat::utc('now +1 month'); + } + + // The system hadn't been successul contacted for more than a month, so try again in three months + return DateTimeFormat::utc('now +3 month'); + } + /** * Decides if a server needs to be updated, based upon several date fields * @@ -235,10 +284,13 @@ class GServer * * @param string $url */ - private static function setFailure(string $url) + public static function setFailure(string $url) { - if (DBA::exists('gserver', ['nurl' => Strings::normaliseLink($url)])) { - DBA::update('gserver', ['failed' => true, 'last_failure' => DateTimeFormat::utcNow(), 'detection-method' => null], + $gserver = DBA::selectFirst('gserver', [], ['nurl' => Strings::normaliseLink($url)]); + if (DBA::isResult($gserver)) { + $next_update = self::getNextUpdateDate(false, $gserver['created'], $gserver['last_contact']); + DBA::update('gserver', ['failed' => true, 'last_failure' => DateTimeFormat::utcNow(), + 'next_contact' => $next_update, 'detection-method' => null], ['nurl' => Strings::normaliseLink($url)]); Logger::info('Set failed status for existing server', ['url' => $url]); return; @@ -306,6 +358,7 @@ class GServer // If the URL missmatches, then we mark the old entry as failure if ($url != $original_url) { + /// @todo What to do with "next_contact" here? DBA::update('gserver', ['failed' => true, 'last_failure' => DateTimeFormat::utcNow()], ['nurl' => Strings::normaliseLink($original_url)]); } @@ -452,6 +505,8 @@ class GServer $serverdata = self::detectNetworkViaContacts($url, $serverdata); } + $serverdata['next_contact'] = self::getNextUpdateDate(true); + $serverdata['last_contact'] = DateTimeFormat::utcNow(); $serverdata['failed'] = false; @@ -1593,13 +1648,6 @@ class GServer ); while ($gserver = DBA::fetch($gservers)) { - if (!GServer::check($gserver['url'], $gserver['network'])) { - // The server is not reachable? Okay, then we will try it later - $fields = ['last_poco_query' => DateTimeFormat::utcNow()]; - DBA::update('gserver', $fields, ['nurl' => $gserver['nurl']]); - continue; - } - Logger::info('Update peer list', ['server' => $gserver['url'], 'id' => $gserver['id']]); Worker::add(PRIORITY_LOW, 'UpdateServerPeers', $gserver['url']); diff --git a/src/Worker/Cron.php b/src/Worker/Cron.php index 270387d35..c47e4bd20 100644 --- a/src/Worker/Cron.php +++ b/src/Worker/Cron.php @@ -63,6 +63,9 @@ class Cron // Update contact information Worker::add(PRIORITY_LOW, 'UpdatePublicContacts'); + // Update server information + Worker::add(PRIORITY_LOW, 'UpdateGServers'); + // run the process to update server directories in the background Worker::add(PRIORITY_LOW, 'UpdateServerDirectories'); @@ -103,8 +106,6 @@ class Cron // update nodeinfo data Worker::add(PRIORITY_LOW, 'NodeInfo'); - Worker::add(PRIORITY_LOW, 'UpdateGServers'); - // Repair entries in the database Worker::add(PRIORITY_LOW, 'RepairDatabase'); diff --git a/src/Worker/UpdateGServer.php b/src/Worker/UpdateGServer.php index 12f9572b9..696ec125d 100644 --- a/src/Worker/UpdateGServer.php +++ b/src/Worker/UpdateGServer.php @@ -32,18 +32,19 @@ class UpdateGServer * @param string $server_url Server URL * @param boolean $only_nodeinfo Only use nodeinfo for server detection */ - public static function execute(string $server_url, bool $only_nodeinfo = false) + public static function execute(string $server_url, bool $only_nodeinfo = false, bool $force = false) { if (empty($server_url)) { return; } - $server_url = filter_var($server_url, FILTER_SANITIZE_URL); - if (substr(Strings::normaliseLink($server_url), 0, 7) != 'http://') { + $filtered = filter_var($server_url, FILTER_SANITIZE_URL); + if (substr(Strings::normaliseLink($filtered), 0, 7) != 'http://') { + GServer::setFailure($filtered); return; } - $ret = GServer::check($server_url, '', false, $only_nodeinfo); - Logger::info('Updated gserver', ['url' => $server_url, 'result' => $ret]); + $ret = GServer::check($filtered, '', $force, $only_nodeinfo); + Logger::info('Updated gserver', ['url' => $filtered, 'result' => $ret]); } } diff --git a/src/Worker/UpdateGServers.php b/src/Worker/UpdateGServers.php index 5a4513846..12022a346 100644 --- a/src/Worker/UpdateGServers.php +++ b/src/Worker/UpdateGServers.php @@ -24,34 +24,36 @@ namespace Friendica\Worker; use Friendica\Core\Logger; use Friendica\Core\Worker; use Friendica\Database\DBA; -use Friendica\Model\GServer; class UpdateGServers { /** - * Updates the first 250 servers + * Updates up to 100 servers */ public static function execute() { - $gservers = DBA::p("SELECT `url`, `created`, `last_failure`, `last_contact` FROM `gserver` ORDER BY rand()"); + $updating = Worker::countWorkersByCommand('UpdateGServer'); + $limit = 100 - $updating; + if ($limit <= 0) { + Logger::info('The number of currently running jobs exceed the limit'); + return; + } + + $outdated = DBA::count('gserver', ["`next_contact` < UTC_TIMESTAMP()"]); + $total = DBA::count('gserver'); + Logger::info('Server status', ['total' => $total, 'outdated' => $outdated, 'updating' => $limit]); + + $gservers = DBA::select('gserver', ['url'], ["`next_contact` < UTC_TIMESTAMP()"], ['limit' => $limit]); if (!DBA::isResult($gservers)) { return; } - $updated = 0; - + $count = 0; while ($gserver = DBA::fetch($gservers)) { - if (!GServer::updateNeeded($gserver['created'], '', $gserver['last_failure'], $gserver['last_contact'])) { - continue; - } - Logger::info('Update server status', ['server' => $gserver['url']]); - - Worker::add(PRIORITY_LOW, 'UpdateGServer', $gserver['url']); - - if (++$updated > 250) { - return; - } + Worker::add(PRIORITY_LOW, 'UpdateGServer', $gserver['url'], false, true); + $count++; } DBA::close($gservers); + Logger::info('Updated servers', ['count' => $count]); } } diff --git a/src/Worker/UpdatePublicContacts.php b/src/Worker/UpdatePublicContacts.php index 2e8602b05..939d9fa8d 100644 --- a/src/Worker/UpdatePublicContacts.php +++ b/src/Worker/UpdatePublicContacts.php @@ -39,16 +39,27 @@ class UpdatePublicContacts $ids = []; $base_condition = ['network' => Protocol::FEDERATED, 'uid' => 0, 'self' => false]; + $existing = Worker::countWorkersByCommand('UpdateContact'); + Logger::info('Already existing jobs', ['existing' => $existing]); + if ($existing > 100) { + return; + } + + $limit = 100 - $existing; + if (!DI::config()->get('system', 'update_active_contacts')) { + $part = 3; // Add every contact (mostly failed ones) that hadn't been updated for six months $condition = DBA::mergeConditions($base_condition, ["`last-update` < ?", DateTimeFormat::utc('now - 6 month')]); - $ids = self::getContactsToUpdate($condition, $ids); + $ids = self::getContactsToUpdate($condition, $ids, round($limit / $part)); // Add every non failed contact that hadn't been updated for a month $condition = DBA::mergeConditions($base_condition, ["NOT `failed` AND `last-update` < ?", DateTimeFormat::utc('now - 1 month')]); - $ids = self::getContactsToUpdate($condition, $ids); + $ids = self::getContactsToUpdate($condition, $ids, round($limit / $part)); + } else { + $part = 1; } // Add every contact our system interacted with and hadn't been updated for a week @@ -56,7 +67,7 @@ class UpdatePublicContacts `id` IN (SELECT `owner-id` FROM `item`) OR `id` IN (SELECT `causer-id` FROM `item`) OR `id` IN (SELECT `cid` FROM `post-tag`) OR `id` IN (SELECT `cid` FROM `user-contact`)) AND `last-update` < ?", DateTimeFormat::utc('now - 1 week')]); - $ids = self::getContactsToUpdate($condition, $ids); + $ids = self::getContactsToUpdate($condition, $ids, round($limit / $part)); foreach ($ids as $id) { Worker::add(PRIORITY_LOW, "UpdateContact", $id); @@ -73,9 +84,9 @@ class UpdatePublicContacts * @param array $ids * @return array contact ids */ - private static function getContactsToUpdate(array $condition, array $ids = []) + private static function getContactsToUpdate(array $condition, array $ids = [], int $limit) { - $contacts = DBA::select('contact', ['id'], $condition, ['limit' => 100, 'order' => ['last-update']]); + $contacts = DBA::select('contact', ['id'], $condition, ['limit' => $limit, 'order' => ['last-update']]); while ($contact = DBA::fetch($contacts)) { $ids[] = $contact['id']; } diff --git a/static/dbstructure.config.php b/static/dbstructure.config.php index 9a125cc15..c8ebd656d 100644 --- a/static/dbstructure.config.php +++ b/static/dbstructure.config.php @@ -81,13 +81,15 @@ return [ "detection-method" => ["type" => "tinyint unsigned", "comment" => "Method that had been used to detect that server"], "created" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => ""], "last_poco_query" => ["type" => "datetime", "default" => DBA::NULL_DATETIME, "comment" => ""], - "last_contact" => ["type" => "datetime", "default" => DBA::NULL_DATETIME, "comment" => ""], - "last_failure" => ["type" => "datetime", "default" => DBA::NULL_DATETIME, "comment" => ""], + "last_contact" => ["type" => "datetime", "default" => DBA::NULL_DATETIME, "comment" => "Last successful connection request"], + "last_failure" => ["type" => "datetime", "default" => DBA::NULL_DATETIME, "comment" => "Last failed connection request"], "failed" => ["type" => "boolean", "comment" => "Connection failed"], + "next_contact" => ["type" => "datetime", "default" => DBA::NULL_DATETIME, "comment" => "Next connection request"], ], "indexes" => [ "PRIMARY" => ["id"], "nurl" => ["UNIQUE", "nurl(190)"], + "next_contact" => ["next_contact"], ] ], "user" => [ @@ -1496,7 +1498,8 @@ return [ "comment" => "Background tasks queue entries", "fields" => [ "id" => ["type" => "int unsigned", "not null" => "1", "extra" => "auto_increment", "primary" => "1", "comment" => "Auto incremented worker task id"], - "parameter" => ["type" => "mediumtext", "comment" => "Task command"], + "command" => ["type" => "varchar(100)", "comment" => "Task command"], + "parameter" => ["type" => "mediumtext", "comment" => "Task parameter"], "priority" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "comment" => "Task priority"], "created" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "Creation date"], "pid" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "comment" => "Process id of the worker"], @@ -1507,7 +1510,8 @@ return [ ], "indexes" => [ "PRIMARY" => ["id"], - "done_parameter" => ["done", "parameter(64)"], + "command" => ["command"], + "done_command_parameter" => ["done", "command", "parameter(64)"], "done_executed" => ["done", "executed"], "done_priority_retrial_created" => ["done", "priority", "retrial", "created"], "done_priority_next_try" => ["done", "priority", "next_try"], From 8cbe2f025b0c85b8397b6ec6e19106ee7f122e5f Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 3 Dec 2020 15:50:14 +0000 Subject: [PATCH 2/5] Updates database version --- database.sql | 16 ++++++++++------ static/dbstructure.config.php | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/database.sql b/database.sql index b04b088d8..fbfdd70bb 100644 --- a/database.sql +++ b/database.sql @@ -1,6 +1,6 @@ -- ------------------------------------------ -- Friendica 2020.12-dev (Red Hot Poker) --- DB_UPDATE_VERSION 1382 +-- DB_UPDATE_VERSION 1383 -- ------------------------------------------ @@ -26,11 +26,13 @@ CREATE TABLE IF NOT EXISTS `gserver` ( `detection-method` tinyint unsigned COMMENT 'Method that had been used to detect that server', `created` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT '', `last_poco_query` datetime DEFAULT '0001-01-01 00:00:00' COMMENT '', - `last_contact` datetime DEFAULT '0001-01-01 00:00:00' COMMENT '', - `last_failure` datetime DEFAULT '0001-01-01 00:00:00' COMMENT '', + `last_contact` datetime DEFAULT '0001-01-01 00:00:00' COMMENT 'Last successful connection request', + `last_failure` datetime DEFAULT '0001-01-01 00:00:00' COMMENT 'Last failed connection request', `failed` boolean COMMENT 'Connection failed', + `next_contact` datetime DEFAULT '0001-01-01 00:00:00' COMMENT 'Next connection request', PRIMARY KEY(`id`), - UNIQUE INDEX `nurl` (`nurl`(190)) + UNIQUE INDEX `nurl` (`nurl`(190)), + INDEX `next_contact` (`next_contact`) ) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Global servers'; -- @@ -1464,7 +1466,8 @@ CREATE TABLE IF NOT EXISTS `worker-ipc` ( -- CREATE TABLE IF NOT EXISTS `workerqueue` ( `id` int unsigned NOT NULL auto_increment COMMENT 'Auto incremented worker task id', - `parameter` mediumtext COMMENT 'Task command', + `command` varchar(100) COMMENT 'Task command', + `parameter` mediumtext COMMENT 'Task parameter', `priority` tinyint unsigned NOT NULL DEFAULT 0 COMMENT 'Task priority', `created` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'Creation date', `pid` int unsigned NOT NULL DEFAULT 0 COMMENT 'Process id of the worker', @@ -1473,7 +1476,8 @@ CREATE TABLE IF NOT EXISTS `workerqueue` ( `retrial` tinyint NOT NULL DEFAULT 0 COMMENT 'Retrial counter', `done` boolean NOT NULL DEFAULT '0' COMMENT 'Marked 1 when the task was done - will be deleted later', PRIMARY KEY(`id`), - INDEX `done_parameter` (`done`,`parameter`(64)), + INDEX `command` (`command`), + INDEX `done_command_parameter` (`done`,`command`,`parameter`(64)), INDEX `done_executed` (`done`,`executed`), INDEX `done_priority_retrial_created` (`done`,`priority`,`retrial`,`created`), INDEX `done_priority_next_try` (`done`,`priority`,`next_try`), diff --git a/static/dbstructure.config.php b/static/dbstructure.config.php index e413822d7..9c414f112 100644 --- a/static/dbstructure.config.php +++ b/static/dbstructure.config.php @@ -55,7 +55,7 @@ use Friendica\Database\DBA; if (!defined('DB_UPDATE_VERSION')) { - define('DB_UPDATE_VERSION', 1382); + define('DB_UPDATE_VERSION', 1383); } return [ From 8d999f54d025cebcaeef9386ae9910def7af52c3 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 3 Dec 2020 22:32:51 +0000 Subject: [PATCH 3/5] Check and add a server --- src/Model/GServer.php | 95 +++++++------------------------- src/Worker/UpdateGServer.php | 18 +++++- src/Worker/UpdateGServers.php | 26 +++++++-- src/Worker/UpdateServerPeers.php | 4 +- static/defaults.config.php | 4 ++ 5 files changed, 63 insertions(+), 84 deletions(-) diff --git a/src/Model/GServer.php b/src/Model/GServer.php index c035abc73..0b79b3f8f 100644 --- a/src/Model/GServer.php +++ b/src/Model/GServer.php @@ -70,6 +70,22 @@ class GServer const DETECT_NODEINFO_1 = 101; const DETECT_NODEINFO_2 = 102; + /** + * Check for the existance of a server and adds it in the background if not existant + * + * @param string $url + * @param boolean $only_nodeinfo + * @return void + */ + public static function add(string $url, bool $only_nodeinfo = false) + { + if (self::getID($url, false)) { + return; + } + + Worker::add(PRIORITY_LOW, 'UpdateGServer', $url, $only_nodeinfo); + } + /** * Get the ID for the given server URL * @@ -173,61 +189,6 @@ class GServer return DateTimeFormat::utc('now +3 month'); } - /** - * Decides if a server needs to be updated, based upon several date fields - * - * @param date $created Creation date of that server entry - * @param date $updated When had the server entry be updated - * @param date $last_failure Last failure when contacting that server - * @param date $last_contact Last time the server had been contacted - * - * @return boolean Does the server record needs an update? - */ - public static function updateNeeded($created, $updated, $last_failure, $last_contact) - { - $now = strtotime(DateTimeFormat::utcNow()); - - if ($updated > $last_contact) { - $contact_time = strtotime($updated); - } else { - $contact_time = strtotime($last_contact); - } - - $failure_time = strtotime($last_failure); - $created_time = strtotime($created); - - // If there is no "created" time then use the current time - if ($created_time <= 0) { - $created_time = $now; - } - - // If the last contact was less than 24 hours then don't update - if (($now - $contact_time) < (60 * 60 * 24)) { - return false; - } - - // If the last failure was less than 24 hours then don't update - if (($now - $failure_time) < (60 * 60 * 24)) { - return false; - } - - // If the last contact was less than a week ago and the last failure is older than a week then don't update - //if ((($now - $contact_time) < (60 * 60 * 24 * 7)) && ($contact_time > $failure_time)) - // return false; - - // If the last contact time was more than a week ago and the contact was created more than a week ago, then only try once a week - if ((($now - $contact_time) > (60 * 60 * 24 * 7)) && (($now - $created_time) > (60 * 60 * 24 * 7)) && (($now - $failure_time) < (60 * 60 * 24 * 7))) { - return false; - } - - // If the last contact time was more than a month ago and the contact was created more than a month ago, then only try once a month - if ((($now - $contact_time) > (60 * 60 * 24 * 30)) && (($now - $created_time) > (60 * 60 * 24 * 30)) && (($now - $failure_time) < (60 * 60 * 24 * 30))) { - return false; - } - - return true; - } - /** * Checks the state of the given server. * @@ -241,7 +202,6 @@ class GServer public static function check(string $server_url, string $network = '', bool $force = false, bool $only_nodeinfo = false) { $server_url = self::cleanURL($server_url); - if ($server_url == '') { return false; } @@ -254,24 +214,11 @@ class GServer DBA::update('gserver', $fields, $condition); } - $last_contact = $gserver['last_contact']; - $last_failure = $gserver['last_failure']; - - // See discussion under https://forum.friendi.ca/display/0b6b25a8135aabc37a5a0f5684081633 - // It can happen that a zero date is in the database, but storing it again is forbidden. - if ($last_contact < DBA::NULL_DATETIME) { - $last_contact = DBA::NULL_DATETIME; - } - - if ($last_failure < DBA::NULL_DATETIME) { - $last_failure = DBA::NULL_DATETIME; - } - - if (!$force && !self::updateNeeded($gserver['created'], '', $last_failure, $last_contact)) { + if (!$force && (strtotime($gserver['next_contact']) > time())) { Logger::info('No update needed', ['server' => $server_url]); - return ($last_contact >= $last_failure); + return (!$gserver['failed']); } - Logger::info('Server is outdated. Start discovery.', ['Server' => $server_url, 'Force' => $force, 'Created' => $gserver['created'], 'Failure' => $last_failure, 'Contact' => $last_contact]); + Logger::info('Server is outdated. Start discovery.', ['Server' => $server_url, 'Force' => $force]); } else { Logger::info('Server is unknown. Start discovery.', ['Server' => $server_url]); } @@ -1690,7 +1637,7 @@ class GServer if (!empty($data['data']['nodes'])) { foreach ($data['data']['nodes'] as $server) { // Using "only_nodeinfo" since servers that are listed on that page should always have it. - Worker::add(PRIORITY_LOW, 'UpdateGServer', 'https://' . $server['host'], true); + self::add('https://' . $server['host'], true); } } } @@ -1709,7 +1656,7 @@ class GServer foreach ($servers['instances'] as $server) { $url = (is_null($server['https_score']) ? 'http' : 'https') . '://' . $server['name']; - Worker::add(PRIORITY_LOW, 'UpdateGServer', $url); + self::add($url); } } } diff --git a/src/Worker/UpdateGServer.php b/src/Worker/UpdateGServer.php index 696ec125d..0c42a67ec 100644 --- a/src/Worker/UpdateGServer.php +++ b/src/Worker/UpdateGServer.php @@ -22,6 +22,7 @@ namespace Friendica\Worker; use Friendica\Core\Logger; +use Friendica\Database\DBA; use Friendica\Model\GServer; use Friendica\Util\Strings; @@ -32,7 +33,7 @@ class UpdateGServer * @param string $server_url Server URL * @param boolean $only_nodeinfo Only use nodeinfo for server detection */ - public static function execute(string $server_url, bool $only_nodeinfo = false, bool $force = false) + public static function execute(string $server_url, bool $only_nodeinfo = false) { if (empty($server_url)) { return; @@ -40,11 +41,22 @@ class UpdateGServer $filtered = filter_var($server_url, FILTER_SANITIZE_URL); if (substr(Strings::normaliseLink($filtered), 0, 7) != 'http://') { - GServer::setFailure($filtered); + GServer::setFailure($server_url); return; } - $ret = GServer::check($filtered, '', $force, $only_nodeinfo); + if (($filtered != $server_url) && DBA::exists('gserver', ['nurl' => Strings::normaliseLink($server_url)])) { + GServer::setFailure($server_url); + return; + } + + $cleaned = GServer::cleanURL($server_url); + if (($cleaned != $server_url) && DBA::exists('gserver', ['nurl' => Strings::normaliseLink($server_url)])) { + GServer::setFailure($server_url); + return; + } + + $ret = GServer::check($filtered, '', true, $only_nodeinfo); Logger::info('Updated gserver', ['url' => $filtered, 'result' => $ret]); } } diff --git a/src/Worker/UpdateGServers.php b/src/Worker/UpdateGServers.php index 12022a346..486dbc93c 100644 --- a/src/Worker/UpdateGServers.php +++ b/src/Worker/UpdateGServers.php @@ -24,33 +24,49 @@ namespace Friendica\Worker; use Friendica\Core\Logger; use Friendica\Core\Worker; use Friendica\Database\DBA; +use Friendica\DI; +use Friendica\Util\Strings; class UpdateGServers { /** - * Updates up to 100 servers + * Updates a defined number of servers */ public static function execute() { + $update_limit = DI::config()->get('system', 'gserver_update_limit'); + if (empty($update_limit)) { + return; + } + $updating = Worker::countWorkersByCommand('UpdateGServer'); - $limit = 100 - $updating; + $limit = $update_limit - $updating; if ($limit <= 0) { Logger::info('The number of currently running jobs exceed the limit'); return; } - $outdated = DBA::count('gserver', ["`next_contact` < UTC_TIMESTAMP()"]); $total = DBA::count('gserver'); + $condition = ["`next_contact` < UTC_TIMESTAMP() AND (`nurl` != ? OR `url` != ?)", '', '']; + $outdated = DBA::count('gserver', $condition); Logger::info('Server status', ['total' => $total, 'outdated' => $outdated, 'updating' => $limit]); - $gservers = DBA::select('gserver', ['url'], ["`next_contact` < UTC_TIMESTAMP()"], ['limit' => $limit]); + $gservers = DBA::select('gserver', ['url', 'nurl'], $condition, ['limit' => $limit]); if (!DBA::isResult($gservers)) { return; } $count = 0; while ($gserver = DBA::fetch($gservers)) { - Worker::add(PRIORITY_LOW, 'UpdateGServer', $gserver['url'], false, true); + // Sometimes the "nurl" and "url" doesn't seem to fit, see https://forum.friendi.ca/display/ec054ce7-155f-c94d-6159-f50372664245 + // There are duplicated "url" but not "nurl". So we check both addresses instead of just overwriting them, + // since that would mean loosing data. + if (!empty($gserver['url'])) { + Worker::add(PRIORITY_LOW, 'UpdateGServer', $gserver['url']); + } + if (!empty($gserver['nurl']) && ($gserver['nurl'] != Strings::normaliseLink($gserver['url']))) { + Worker::add(PRIORITY_LOW, 'UpdateGServer', $gserver['nurl']); + } $count++; } DBA::close($gservers); diff --git a/src/Worker/UpdateServerPeers.php b/src/Worker/UpdateServerPeers.php index ff0cdfa73..d7dc7562c 100644 --- a/src/Worker/UpdateServerPeers.php +++ b/src/Worker/UpdateServerPeers.php @@ -22,9 +22,9 @@ namespace Friendica\Worker; use Friendica\Core\Logger; -use Friendica\Core\Worker; use Friendica\Database\DBA; use Friendica\DI; +use Friendica\Model\GServer; use Friendica\Util\Strings; class UpdateServerPeers @@ -58,7 +58,7 @@ class UpdateServerPeers continue; } // This endpoint doesn't offer the schema. So we assume that it is HTTPS. - Worker::add(PRIORITY_LOW, 'UpdateGServer', 'https://' . $peer); + GServer::add('https://' . $peer); ++$added; } Logger::info('Server peer update ended', ['total' => $total, 'added' => $added, 'url' => $url]); diff --git a/static/defaults.config.php b/static/defaults.config.php index e7d7985aa..053811b7f 100644 --- a/static/defaults.config.php +++ b/static/defaults.config.php @@ -254,6 +254,10 @@ return [ // This can alternatively be set on a per account basis in the pconfig table. 'groupedit_image_limit' => 400, + // gserver_update_limit (Integer) + // How much servers should be checked at a time? + 'gserver_update_limit' => 100, + // hsts (Boolean) // Enables the sending of HTTP Strict Transport Security headers. 'hsts' => false, From 9f96f3ef347e44851a265b1d99e5bb6cf2f4514b Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 4 Dec 2020 05:53:11 +0000 Subject: [PATCH 4/5] Moved updating personal contacts to updatecontact --- src/Model/Contact.php | 40 ++++++++------ src/Module/Contact.php | 4 +- src/Worker/Cron.php | 2 +- src/Worker/OnePoll.php | 2 +- src/Worker/PollContacts.php | 3 +- ...ePublicContacts.php => UpdateContacts.php} | 54 ++++++++++--------- static/defaults.config.php | 4 ++ 7 files changed, 62 insertions(+), 47 deletions(-) rename src/Worker/{UpdatePublicContacts.php => UpdateContacts.php} (63%) diff --git a/src/Model/Contact.php b/src/Model/Contact.php index 99e34abd2..4c173c1be 100644 --- a/src/Model/Contact.php +++ b/src/Model/Contact.php @@ -1722,31 +1722,38 @@ class Contact } // Search for duplicated contacts and get rid of them - if (self::removeDuplicates(Strings::normaliseLink($url), $uid) || ($uid != 0)) { + if (self::removeDuplicates(Strings::normaliseLink($url), $uid)) { return; } - // Archive or unarchive the contact. We only need to do this for the public contact. - // The archive/unarchive function will update the personal contacts by themselves. + // Archive or unarchive the contact. $contact = DBA::selectFirst('contact', [], ['id' => $id]); if (!DBA::isResult($contact)) { Logger::info('Couldn\'t select contact for archival.', ['id' => $id]); return; } - if (!empty($fields['success_update'])) { - self::unmarkForArchival($contact); - } elseif (!empty($fields['failure_update'])) { - self::markForArchival($contact); + if (isset($fields['failed'])) { + if ($fields['failed']) { + self::markForArchival($contact); + } else { + self::unmarkForArchival($contact); + } } - $condition = ['self' => false, 'nurl' => Strings::normaliseLink($url), 'network' => Protocol::FEDERATED]; + if ($contact['uid'] != 0) { + return; + } - // These contacts are sharing with us, we don't poll them. - // This means that we don't set the update fields in "OnePoll.php". - $condition['rel'] = self::SHARING; + // Update contact data for all users + $condition = ['self' => false, 'nurl' => Strings::normaliseLink($url)]; + + $condition['network'] = [Protocol::DFRN, Protocol::DIASPORA, Protocol::ACTIVITYPUB]; DBA::update('contact', $fields, $condition); + // We mustn't set the update fields for OStatus contacts since they are updated in OnePoll + $condition['network'] = Protocol::OSTATUS; + // If the contact failed, propagate the update fields to all contacts if (empty($fields['failed'])) { unset($fields['last-update']); @@ -1758,8 +1765,6 @@ class Contact return; } - // We are polling these contacts, so we mustn't set the update fields here. - $condition['rel'] = [self::FOLLOWER, self::FRIEND]; DBA::update('contact', $fields, $condition); } @@ -1957,7 +1962,7 @@ class Contact $ret['name-date'] = $updated; } - if ($uid == 0) { + if (($uid == 0) || in_array($ret['network'], [Protocol::DFRN, Protocol::DIASPORA, Protocol::ACTIVITYPUB])) { $ret['last-update'] = $updated; $ret['success_update'] = $updated; } @@ -2224,8 +2229,11 @@ class Contact self::updateAvatar($contact_id, $ret['photo']); // pull feed and consume it, which should subscribe to the hub. - - Worker::add(PRIORITY_HIGH, "OnePoll", $contact_id, "force"); + if ($contact['network'] == Protocol::OSTATUS) { + Worker::add(PRIORITY_HIGH, 'OnePoll', $contact_id, 'force'); + } else { + Worker::add(PRIORITY_HIGH, 'UpdateContact', $contact_id); + } $owner = User::getOwnerDataById($user['uid']); diff --git a/src/Module/Contact.php b/src/Module/Contact.php index 4d4508a8c..f82b7d3cc 100644 --- a/src/Module/Contact.php +++ b/src/Module/Contact.php @@ -181,9 +181,11 @@ class Contact extends BaseModule if ($result['success']) { DBA::update('contact', ['subhub' => 1], ['id' => $contact_id]); } - } else { + // pull feed and consume it, which should subscribe to the hub. Worker::add(PRIORITY_HIGH, 'OnePoll', $contact_id, 'force'); + } else { + Worker::add(PRIORITY_HIGH, 'UpdateContact', $contact_id); } } diff --git a/src/Worker/Cron.php b/src/Worker/Cron.php index c47e4bd20..18d9de592 100644 --- a/src/Worker/Cron.php +++ b/src/Worker/Cron.php @@ -61,7 +61,7 @@ class Cron Worker::add(PRIORITY_MEDIUM, 'PollContacts'); // Update contact information - Worker::add(PRIORITY_LOW, 'UpdatePublicContacts'); + Worker::add(PRIORITY_LOW, 'UpdateContacts'); // Update server information Worker::add(PRIORITY_LOW, 'UpdateGServers'); diff --git a/src/Worker/OnePoll.php b/src/Worker/OnePoll.php index 73971a8b1..bb3dd9719 100644 --- a/src/Worker/OnePoll.php +++ b/src/Worker/OnePoll.php @@ -56,7 +56,7 @@ class OnePoll // We never probe mail contacts since their probing demands a mail from the contact in the inbox. // We don't probe feed accounts by default since they are polled in a higher frequency, but forced probes are okay. - if (!in_array($contact['network'], [Protocol::MAIL, Protocol::FEED]) || ($force && ($contact['network'] == Protocol::FEED))) { + if ($force && ($contact['network'] == Protocol::FEED)) { $success = Contact::updateFromProbe($contact_id); } else { $success = true; diff --git a/src/Worker/PollContacts.php b/src/Worker/PollContacts.php index 078cf202a..b3ac529de 100644 --- a/src/Worker/PollContacts.php +++ b/src/Worker/PollContacts.php @@ -41,8 +41,7 @@ class PollContacts $abandon_days = 0; } - $condition = ['network' => [Protocol::DFRN, Protocol::ACTIVITYPUB, Protocol::OSTATUS, Protocol::FEED, - Protocol::MAIL, Protocol::ZOT, Protocol::PHANTOM], 'self' => false, 'blocked' => false]; + $condition = ['network' => [Protocol::FEED, Protocol::MAIL, Protocol::OSTATUS], 'self' => false, 'blocked' => false]; if (!empty($abandon_days)) { $condition = DBA::mergeConditions($condition, diff --git a/src/Worker/UpdatePublicContacts.php b/src/Worker/UpdateContacts.php similarity index 63% rename from src/Worker/UpdatePublicContacts.php rename to src/Worker/UpdateContacts.php index 939d9fa8d..2328c7c84 100644 --- a/src/Worker/UpdatePublicContacts.php +++ b/src/Worker/UpdateContacts.php @@ -29,52 +29,54 @@ use Friendica\DI; use Friendica\Util\DateTimeFormat; /** - * Update public contacts + * Update federated contacts */ -class UpdatePublicContacts +class UpdateContacts { public static function execute() { $count = 0; $ids = []; - $base_condition = ['network' => Protocol::FEDERATED, 'uid' => 0, 'self' => false]; + $base_condition = ['network' => array_merge(Protocol::FEDERATED, [Protocol::ZOT, Protocol::PHANTOM]), 'self' => false]; - $existing = Worker::countWorkersByCommand('UpdateContact'); - Logger::info('Already existing jobs', ['existing' => $existing]); - if ($existing > 100) { + $update_limit = DI::config()->get('system', 'contact_update_limit'); + if (empty($update_limit)) { return; } - $limit = 100 - $existing; - - if (!DI::config()->get('system', 'update_active_contacts')) { - $part = 3; - // Add every contact (mostly failed ones) that hadn't been updated for six months - $condition = DBA::mergeConditions($base_condition, - ["`last-update` < ?", DateTimeFormat::utc('now - 6 month')]); - $ids = self::getContactsToUpdate($condition, $ids, round($limit / $part)); - - // Add every non failed contact that hadn't been updated for a month - $condition = DBA::mergeConditions($base_condition, - ["NOT `failed` AND `last-update` < ?", DateTimeFormat::utc('now - 1 month')]); - $ids = self::getContactsToUpdate($condition, $ids, round($limit / $part)); - } else { - $part = 1; + $updating = Worker::countWorkersByCommand('UpdateContact'); + $limit = $update_limit - $updating; + if ($limit <= 0) { + Logger::info('The number of currently running jobs exceed the limit'); + return; } - // Add every contact our system interacted with and hadn't been updated for a week + // Add every contact our system interacted with and hadn't been updated for a week if unarchived + // or for a month if archived. $condition = DBA::mergeConditions($base_condition, ["(`id` IN (SELECT `author-id` FROM `item`) OR `id` IN (SELECT `owner-id` FROM `item`) OR `id` IN (SELECT `causer-id` FROM `item`) OR - `id` IN (SELECT `cid` FROM `post-tag`) OR `id` IN (SELECT `cid` FROM `user-contact`)) AND - `last-update` < ?", DateTimeFormat::utc('now - 1 week')]); - $ids = self::getContactsToUpdate($condition, $ids, round($limit / $part)); + `id` IN (SELECT `cid` FROM `post-tag`) OR `id` IN (SELECT `cid` FROM `user-contact`) OR `uid` != ?) AND + (`last-update` < ? OR (NOT `archive` AND `last-update` < ?))", + 0, DateTimeFormat::utc('now - 1 month'), DateTimeFormat::utc('now - 1 week')]); + $ids = self::getContactsToUpdate($condition, $ids, $limit - count($ids)); + + Logger::info('Fetched interacting federated contacts', ['count' => count($ids)]); + + if (!DI::config()->get('system', 'update_active_contacts')) { + // Add every contact (mostly failed ones) that hadn't been updated for six months + // and every non failed contact that hadn't been updated for a month + $condition = DBA::mergeConditions($base_condition, + ["(`last-update` < ? OR (NOT `archive` AND `last-update` < ?))", + DateTimeFormat::utc('now - 6 month'), DateTimeFormat::utc('now - 1 month')]); + $ids = self::getContactsToUpdate($condition, $ids, $limit - count($ids)); + } foreach ($ids as $id) { Worker::add(PRIORITY_LOW, "UpdateContact", $id); ++$count; } - Logger::info('Initiated update for public contacts', ['count' => $count]); + Logger::info('Initiated update for federated contacts', ['count' => $count]); } /** diff --git a/static/defaults.config.php b/static/defaults.config.php index 053811b7f..455272926 100644 --- a/static/defaults.config.php +++ b/static/defaults.config.php @@ -131,6 +131,10 @@ return [ // Don't display sharing accounts on the global community 'community_no_sharer' => false, + // contact_update_limit (Integer) + // How much contacts should be checked at a time? + 'contact_update_limit' => 100, + // cron_interval (Integer) // Minimal period in minutes between two calls of the "Cron" worker job. 'cron_interval' => 5, From 8e9a9b2049617a61beca85c5f03534aff09ecae7 Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 4 Dec 2020 07:54:29 +0000 Subject: [PATCH 5/5] Improved logging --- src/Worker/UpdateContacts.php | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Worker/UpdateContacts.php b/src/Worker/UpdateContacts.php index 2328c7c84..bf234d989 100644 --- a/src/Worker/UpdateContacts.php +++ b/src/Worker/UpdateContacts.php @@ -35,8 +35,6 @@ class UpdateContacts { public static function execute() { - $count = 0; - $ids = []; $base_condition = ['network' => array_merge(Protocol::FEDERATED, [Protocol::ZOT, Protocol::PHANTOM]), 'self' => false]; $update_limit = DI::config()->get('system', 'contact_update_limit'); @@ -58,8 +56,8 @@ class UpdateContacts `id` IN (SELECT `cid` FROM `post-tag`) OR `id` IN (SELECT `cid` FROM `user-contact`) OR `uid` != ?) AND (`last-update` < ? OR (NOT `archive` AND `last-update` < ?))", 0, DateTimeFormat::utc('now - 1 month'), DateTimeFormat::utc('now - 1 week')]); - $ids = self::getContactsToUpdate($condition, $ids, $limit - count($ids)); - + Logger::info('Updatable interacting federated contacts', ['count' => DBA::count('contact', $condition)]); + $ids = self::getContactsToUpdate($condition, [], $limit); Logger::info('Fetched interacting federated contacts', ['count' => count($ids)]); if (!DI::config()->get('system', 'update_active_contacts')) { @@ -68,9 +66,13 @@ class UpdateContacts $condition = DBA::mergeConditions($base_condition, ["(`last-update` < ? OR (NOT `archive` AND `last-update` < ?))", DateTimeFormat::utc('now - 6 month'), DateTimeFormat::utc('now - 1 month')]); - $ids = self::getContactsToUpdate($condition, $ids, $limit - count($ids)); + Logger::info('Updatable federated contacts', ['count' => DBA::count('contact', $condition)]); + $previous = count($ids); + $ids = self::getContactsToUpdate($condition, $ids, $limit - $previous); + Logger::info('Fetched federated contacts', ['count' => count($ids) - $previous]); } + $count = 0; foreach ($ids as $id) { Worker::add(PRIORITY_LOW, "UpdateContact", $id); ++$count;