From 25b3fa83fc25912ea03638fe1ac1aaaaa28f58be Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 17 Jul 2020 19:15:43 -0400 Subject: [PATCH 1/4] Rename PageInfo::appendToBody to searchAndAppendToBody --- src/Content/PageInfo.php | 2 +- src/Module/Debug/Babel.php | 2 +- src/Protocol/Diaspora.php | 4 ++-- src/Protocol/OStatus.php | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Content/PageInfo.php b/src/Content/PageInfo.php index 642c57938..478e7a490 100644 --- a/src/Content/PageInfo.php +++ b/src/Content/PageInfo.php @@ -40,7 +40,7 @@ class PageInfo * @return string * @throws HTTPException\InternalServerErrorException */ - public static function appendToBody(string $body, bool $searchNakedUrls = false, bool $no_photos = false) + public static function searchAndAppendToBody(string $body, bool $searchNakedUrls = false, bool $no_photos = false) { Logger::info('add_page_info_to_body: fetch page info for body', ['body' => $body]); diff --git a/src/Module/Debug/Babel.php b/src/Module/Debug/Babel.php index 2954bc010..ee9dae305 100644 --- a/src/Module/Debug/Babel.php +++ b/src/Module/Debug/Babel.php @@ -115,7 +115,7 @@ class Babel extends BaseModule 'content' => visible_whitespace(var_export($tags, true)), ]; - $body2 = PageInfo::appendToBody($bbcode, true); + $body2 = PageInfo::searchAndAppendToBody($bbcode, true); $results[] = [ 'title' => DI::l10n()->t('PageInfo::appendToBody'), 'content' => visible_whitespace($body2) diff --git a/src/Protocol/Diaspora.php b/src/Protocol/Diaspora.php index 0dfcf6f77..bd99b361e 100644 --- a/src/Protocol/Diaspora.php +++ b/src/Protocol/Diaspora.php @@ -2622,7 +2622,7 @@ class Diaspora $item["body"] = self::replacePeopleGuid($item["body"], $item["author-link"]); // Add OEmbed and other information to the body - $item["body"] = PageInfo::appendToBody($item["body"], false, true); + $item["body"] = PageInfo::searchAndAppendToBody($item["body"], false, true); return $item; } else { @@ -2986,7 +2986,7 @@ class Diaspora // Add OEmbed and other information to the body if (!self::isHubzilla($contact["url"])) { - $body = PageInfo::appendToBody($body, false, true); + $body = PageInfo::searchAndAppendToBody($body, false, true); } } diff --git a/src/Protocol/OStatus.php b/src/Protocol/OStatus.php index fedf0f253..9a52476b5 100644 --- a/src/Protocol/OStatus.php +++ b/src/Protocol/OStatus.php @@ -698,7 +698,7 @@ class OStatus // Only add additional data when there is no picture in the post if (!strstr($item["body"], '[/img]')) { - $item["body"] = PageInfo::appendToBody($item["body"]); + $item["body"] = PageInfo::searchAndAppendToBody($item["body"]); } Tag::storeFromBody($item['uri-id'], $item['body']); From 886cf400369289d5ef91fabe6d67008209350dca Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 17 Jul 2020 19:18:27 -0400 Subject: [PATCH 2/4] Ensure ParseUrl::getSiteinfo always returns the url and type keys --- src/Util/ParseUrl.php | 51 ++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/src/Util/ParseUrl.php b/src/Util/ParseUrl.php index 62b5d007d..b6d172a3a 100644 --- a/src/Util/ParseUrl.php +++ b/src/Util/ParseUrl.php @@ -55,14 +55,13 @@ class ParseUrl * to avoid endless loops * * @return array which contains needed data for embedding - * string 'url' => The url of the parsed page - * string 'type' => Content type - * string 'title' => The title of the content - * string 'text' => The description for the content - * string 'image' => A preview image of the content (only available - * if $no_geuessing = false - * array'images' = Array of preview pictures - * string 'keywords' => The tags which belong to the content + * string 'url' => The url of the parsed page + * string 'type' => Content type + * string 'title' => (optional) The title of the content + * string 'text' => (optional) The description for the content + * string 'image' => (optional) A preview image of the content (only available if $no_geuessing = false) + * array 'images' => (optional) Array of preview pictures + * string 'keywords' => (optional) The tags which belong to the content * * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @see ParseUrl::getSiteinfo() for more information about scraping @@ -115,14 +114,13 @@ class ParseUrl * @param int $count Internal counter to avoid endless loops * * @return array which contains needed data for embedding - * string 'url' => The url of the parsed page - * string 'type' => Content type - * string 'title' => The title of the content - * string 'text' => The description for the content - * string 'image' => A preview image of the content (only available - * if $no_geuessing = false - * array'images' = Array of preview pictures - * string 'keywords' => The tags which belong to the content + * string 'url' => The url of the parsed page + * string 'type' => Content type + * string 'title' => (optional) The title of the content + * string 'text' => (optional) The description for the content + * string 'image' => (optional) A preview image of the content (only available if $no_guessing = false) + * array 'images' => (optional) Array of preview pictures + * string 'keywords' => (optional) The tags which belong to the content * * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @todo https://developers.google.com/+/plugins/snippet/ @@ -140,28 +138,27 @@ class ParseUrl */ public static function getSiteinfo($url, $no_guessing = false, $do_oembed = true, $count = 1) { - $siteinfo = []; - // Check if the URL does contain a scheme $scheme = parse_url($url, PHP_URL_SCHEME); if ($scheme == '') { - $url = 'http://' . trim($url, '/'); + $url = 'http://' . ltrim($url, '/'); } + $url = trim($url, "'\""); + + $url = Network::stripTrackingQueryParams($url); + + $siteinfo = [ + 'url' => $url, + 'type' => 'link', + ]; + if ($count > 10) { Logger::log('Endless loop detected for ' . $url, Logger::DEBUG); return $siteinfo; } - $url = trim($url, "'"); - $url = trim($url, '"'); - - $url = Network::stripTrackingQueryParams($url); - - $siteinfo['url'] = $url; - $siteinfo['type'] = 'link'; - $curlResult = Network::curl($url); if (!$curlResult->isSuccess()) { return $siteinfo; From 972b65ba33f65be3fee4b6201304702c51b22ed3 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 17 Jul 2020 19:38:28 -0400 Subject: [PATCH 3/4] Add intermediate method PageInfo::appendDataToBody - It handles the already existing attachment in the body case --- src/Content/PageInfo.php | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/Content/PageInfo.php b/src/Content/PageInfo.php index 478e7a490..212082f9f 100644 --- a/src/Content/PageInfo.php +++ b/src/Content/PageInfo.php @@ -49,14 +49,34 @@ class PageInfo return $body; } - $footer = self::getFooterFromUrl($url, $no_photos); - if (!$footer) { + $data = self::queryUrl($url); + if (!$data) { return $body; } - $body = self::stripTrailingUrlFromBody($body, $url); + return self::appendDataToBody($body, $data, $no_photos); + } - $body .= "\n" . $footer; + /** + * @param string $body + * @param array $data + * @param bool $no_photos + * @return string + * @throws HTTPException\InternalServerErrorException + */ + public static function appendDataToBody(string $body, array $data, bool $no_photos = false) + { + // Only one [attachment] tag per body is allowed + $existingAttachmentPos = strpos($body, '[attachment'); + if ($existingAttachmentPos !== false) { + $linkTitle = $data['title'] ?: $data['url']; + // Additional link attachments are prepended before the existing [attachment] tag + $body = substr_replace($body, "\n[bookmark=" . $data['url'] . ']' . $linkTitle . "[/bookmark]\n", $existingAttachmentPos, 0); + } else { + $footer = PageInfo::getFooterFromData($data, $no_photos); + $body = self::stripTrailingUrlFromBody($body, $data['url']); + $body .= "\n" . $footer; + } return $body; } From 911a23f18b7861127504b91eecd3974f5e36b976 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 17 Jul 2020 19:39:12 -0400 Subject: [PATCH 4/4] Use PageInfo::appendDataToBody in ActivityPub\Processor::constructAttachList --- src/Protocol/ActivityPub/Processor.php | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/Protocol/ActivityPub/Processor.php b/src/Protocol/ActivityPub/Processor.php index 745a56c2a..e4cef1704 100644 --- a/src/Protocol/ActivityPub/Processor.php +++ b/src/Protocol/ActivityPub/Processor.php @@ -21,6 +21,7 @@ namespace Friendica\Protocol\ActivityPub; +use Friendica\Content\PageInfo; use Friendica\Content\Text\BBCode; use Friendica\Content\Text\HTML; use Friendica\Core\Logger; @@ -96,18 +97,16 @@ class Processor foreach ($activity['attachments'] as $attach) { switch ($attach['type']) { case 'link': - // Only one [attachment] tag is allowed - $existingAttachmentPos = strpos($item['body'], '[attachment'); - if ($existingAttachmentPos !== false) { - $linkTitle = $attach['title'] ?: $attach['url']; - // Additional link attachments are prepended before the existing [attachment] tag - $item['body'] = substr_replace($item['body'], "\n[bookmark=" . $attach['url'] . ']' . $linkTitle . "[/bookmark]\n", $existingAttachmentPos, 0); - } else { - // Strip the link preview URL from the end of the body if any - $quotedUrl = preg_quote($attach['url'], '#'); - $item['body'] = preg_replace("#\s*(?:\[bookmark={$quotedUrl}].+?\[/bookmark]|\[url={$quotedUrl}].+?\[/url]|\[url]{$quotedUrl}\[/url]|{$quotedUrl})\s*$#", '', $item['body']); - $item['body'] .= "\n[attachment type='link' url='" . $attach['url'] . "' title='" . htmlspecialchars($attach['title'] ?? '', ENT_QUOTES) . "' image='" . ($attach['image'] ?? '') . "']" . ($attach['desc'] ?? '') . '[/attachment]'; - } + $data = [ + 'url' => $attach['url'], + 'type' => $attach['type'], + 'title' => $attach['title'] ?? '', + 'text' => $attach['desc'] ?? '', + 'image' => $attach['image'] ?? '', + 'images' => [], + 'keywords' => [], + ]; + $item['body'] = PageInfo::appendDataToBody($item['body'], $data); break; default: $filetype = strtolower(substr($attach['mediaType'], 0, strpos($attach['mediaType'], '/')));