diff --git a/composer.json b/composer.json index fc26674ca7..452d3cb66d 100644 --- a/composer.json +++ b/composer.json @@ -46,6 +46,7 @@ "michelf/php-markdown": "^1.7", "minishlink/web-push": "^6.0", "mobiledetect/mobiledetectlib": "^3.74", + "mpratt/embera": "~2.0", "nikic/fast-route": "^1.3", "npm-asset/chart.js": "^2.8", "npm-asset/cropperjs": "1.2.2", diff --git a/composer.lock b/composer.lock index ab66a33d05..e8515e95e4 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "e93a8ac7e31cf3e5e0ca76134e5ffa0b", + "content-hash": "4dc343e8c8b0edf62a64b2e285fce26f", "packages": [ { "name": "asika/simple-console", @@ -1771,6 +1771,77 @@ ], "time": "2023-10-27T16:28:04+00:00" }, + { + "name": "mpratt/embera", + "version": "2.0.42", + "source": { + "type": "git", + "url": "https://github.com/mpratt/Embera.git", + "reference": "afa728339c6f078c803c9277a5054ca241b3c469" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/mpratt/Embera/zipball/afa728339c6f078c803c9277a5054ca241b3c469", + "reference": "afa728339c6f078c803c9277a5054ca241b3c469", + "shasum": "" + }, + "require": { + "ext-json": "*", + "php": ">=5.6" + }, + "require-dev": { + "phpstan/phpstan": "^1.4", + "phpunit/phpunit": "^9.0||^10.0", + "symfony/yaml": "^2.1" + }, + "suggest": { + "ext-curl": "Fetch data using curl instead of using file_get_contents" + }, + "type": "library", + "autoload": { + "psr-4": { + "Embera\\": "src/Embera" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Michael Pratt", + "email": "yo@michael-pratt.com", + "homepage": "http://www.michael-pratt.com", + "role": "Author/Developer" + } + ], + "description": "Oembed consumer library. Converts urls into their html embed code. Supports 150+ sites, such as Youtube, Twitter, vimeo, Instagram etc.", + "homepage": "https://github.com/mpratt/Embera", + "keywords": [ + "Auto embed", + "Embed Text", + "Responsive Embeds", + "Url Embed", + "embed", + "instagram", + "oembed", + "twitter", + "vimeo", + "vine", + "youtube" + ], + "support": { + "issues": "https://github.com/mpratt/Embera/issues", + "source": "https://github.com/mpratt/Embera/tree/2.0.42" + }, + "funding": [ + { + "url": "https://paypal.me/mtpratt", + "type": "paypal" + } + ], + "time": "2025-01-04T06:07:59+00:00" + }, { "name": "nikic/fast-route", "version": "v1.3.0", diff --git a/src/Util/ParseUrl.php b/src/Util/ParseUrl.php index f4f290cc69..ea6259c452 100644 --- a/src/Util/ParseUrl.php +++ b/src/Util/ParseUrl.php @@ -9,6 +9,7 @@ namespace Friendica\Util; use DOMDocument; use DOMXPath; +use DOMElement; use Friendica\Content\Text\HTML; use Friendica\Protocol\HTTP\MediaType; use Friendica\Core\Hook; @@ -19,6 +20,7 @@ use Friendica\Network\HTTPClient\Client\HttpClientAccept; use Friendica\Network\HTTPException; use Friendica\Network\HTTPClient\Client\HttpClientOptions; use Friendica\Network\HTTPClient\Client\HttpClientRequest; +use Embera\Embera; /** * Get information about a given URL @@ -449,6 +451,10 @@ class ParseUrl case 'og:type': $siteinfo['pagetype'] = trim($meta_tag['content']); break; + case 'og:video': + case 'og:video:secure_url': + $siteinfo['player']['embed'] = trim($meta_tag['content']); + break; case 'twitter:description': $siteinfo['text'] = trim($meta_tag['content']); break; @@ -458,10 +464,21 @@ class ParseUrl case 'twitter:image': $siteinfo['image'] = $meta_tag['content']; break; + case 'twitter:player': + $siteinfo['player']['embed'] = trim($meta_tag['content']); + break; + case 'twitter:player:width': + $siteinfo['player']['width'] = intval($meta_tag['content']); + break; + case 'twitter:player:height': + $siteinfo['player']['height'] = intval($meta_tag['content']); + break; } } } + $siteinfo = self::getOembedInfo($xpath, $siteinfo); + $list = $xpath->query("//script[@type='application/ld+json']"); foreach ($list as $node) { if (!empty($node->nodeValue)) { @@ -1232,6 +1249,21 @@ class ParseUrl $media['width'] = trim($content); } + $content = JsonLD::fetchElement($jsonld, 'duration'); + if (!empty($content) && is_string($content)) { + $media['duration'] = trim($content); + } + + $content = JsonLD::fetchElement($jsonld, 'contentSize'); + if (!empty($content) && is_string($content)) { + $media['size'] = trim($content); + } + + $content = JsonLD::fetchElement($jsonld, 'uploadDate'); + if (!empty($content) && is_string($content)) { + $media['uploaded'] = trim($content); + } + $content = JsonLD::fetchElement($jsonld, 'image'); if (!empty($content) && is_string($content)) { $media['image'] = trim($content); @@ -1250,4 +1282,143 @@ class ParseUrl $siteinfo[$name][] = $media; return $siteinfo; } + + /** + * Fetch additional information via oEmbed + * + * @param DOMXPath $xpath + * @param array $siteinfo + * + * @return array siteinfo + */ + private static function getOembedInfo(DOMXPath $xpath, array $siteinfo): array + { + $oembed = ''; + foreach ($xpath->query("//link[@type='application/json+oembed']") as $link) { + /** @var DOMElement $link */ + $href = $link->getAttributeNode('href')->nodeValue; + $oembed = $href; + DI::logger()->debug('Found oEmbed JSON', ['url' => $href]); + } + + if (empty($oembed)) { + $embera = new Embera(); + $urldata = $embera->getUrlData([$siteinfo['url']]); + if (empty($urldata)) { + return $siteinfo; + } + $data = current($urldata); + DI::logger()->debug('Found oEmbed JSON from Embera', ['url' => $siteinfo['url']]); + } else { + $result = DI::httpClient()->get($oembed, HttpClientAccept::DEFAULT, [HttpClientOptions::REQUEST => HttpClientRequest::SITEINFO]); + if (!$result->isSuccess()) { + return $siteinfo; + } + $json_string = $result->getBodyString(); + if (empty($json_string)) { + return $siteinfo; + } + + $data = json_decode($json_string, true); + } + + if (empty($data) || !is_array($data)) { + return $siteinfo; + } + + // Youtube provides only basic information to some IP ranges. + // We can detect this by checking if the host is youtube.com and if there is no player information. + // In this case we remove all tainted information provided by Youtube and use the ones provided by OEmbed. + if (parse_url(Strings::normaliseLink($siteinfo['url']), PHP_URL_HOST) == 'youtube.com') { + if (empty($siteinfo['player'])) { + $fields = ['keywords', 'text', 'title', 'author_name', 'author_url', 'publisher_name', 'publisher_url', 'image']; + foreach ($fields as $field) { + unset($siteinfo[$field]); + } + } + } + + $fields = [ + 'title' => 'title', + 'author_name' => 'author_name', + 'author_url' => 'author_url', + 'publisher_name' => 'provider_name', + 'publisher_url' => 'provider_url', + 'image' => 'thumbnail_url', + ]; + + foreach ($fields as $key => $value) { + if (empty($siteinfo[$key]) && !empty($data[$value])) { + $siteinfo[$key] = $data[$value]; + } + } + + if (!empty($data['html']) && empty($siteinfo['player'])) { + $siteinfo = self::setPlayer($data['html'], $siteinfo); + } + + if (!empty($siteinfo['player'])) { + $fields = [ + 'width' => 'width', + 'height' => 'height', + ]; + foreach ($fields as $key => $value) { + if (empty($siteinfo['player'][$key]) && !empty($data[$value])) { + $siteinfo['player'][$key] = $data[$value]; + } + } + } + + return $siteinfo; + } + + /** + * Set the player information from the oEmbed HTML in case that it contains an iframe + * + * @param string $html + * @param array $siteinfo + * + * @return array siteinfo + */ + private static function setPlayer(String $html, array $siteinfo): array + { + $dom = new DOMDocument(); + if (!@$dom->loadHTML($html)) { + return $siteinfo; + } + + $xpath = new DOMXPath($dom); + + $nodes = $xpath->query('/html/body/*'); + if ($nodes->length !== 1) { + return $siteinfo; + } + + /** @var DOMElement $iframe */ + $iframe = $nodes->item(0); + if ($iframe->nodeName !== 'iframe') { + return $siteinfo; + } + + $src = $iframe->getAttributeNode('src')->nodeValue; + if (empty($src)) { + return $siteinfo; + } + + $siteinfo['player']['embed'] = $src; + + $width = $iframe->getAttributeNode('width')->nodeValue ?? null; + if (!empty($width) && is_numeric($width)) { + $siteinfo['player']['width'] = $width; + } + + $height = $iframe->getAttributeNode('height')->nodeValue ?? null; + if (!empty($height) && is_numeric($height)) { + $siteinfo['player']['height'] = $height; + } + + DI::logger()->debug('Found oEmbed iframe', ['embed' => $siteinfo['player']['embed'] ?? '', 'width' => $siteinfo['player']['width'] ?? '', 'height' => $siteinfo['player']['height'] ?? '']); + + return $siteinfo; + } }