diff --git a/include/api.php b/include/api.php index 543c01827..a7d25c83f 100644 --- a/include/api.php +++ b/include/api.php @@ -614,7 +614,7 @@ function api_get_user(App $a, $contact_id = null) 'name' => $contact["name"], 'screen_name' => (($contact['nick']) ? $contact['nick'] : $contact['name']), 'location' => ($contact["location"] != "") ? $contact["location"] : ContactSelector::networkToName($contact['network'], $contact['url']), - 'description' => HTML::toPlaintext(BBCode::toPlaintext($contact["about"])), + 'description' => BBCode::toPlaintext($contact["about"]), 'profile_image_url' => $contact["micro"], 'profile_image_url_https' => $contact["micro"], 'profile_image_url_profile_size' => $contact["thumb"], @@ -693,7 +693,7 @@ function api_get_user(App $a, $contact_id = null) 'name' => (($uinfo[0]['name']) ? $uinfo[0]['name'] : $uinfo[0]['nick']), 'screen_name' => (($uinfo[0]['nick']) ? $uinfo[0]['nick'] : $uinfo[0]['name']), 'location' => $location, - 'description' => HTML::toPlaintext(BBCode::toPlaintext($description)), + 'description' => BBCode::toPlaintext($description), 'profile_image_url' => $uinfo[0]['micro'], 'profile_image_url_https' => $uinfo[0]['micro'], 'profile_image_url_profile_size' => $uinfo[0]["thumb"], diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index b2d4ebb5d..b012e79fb 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -357,10 +357,7 @@ class BBCode extends BaseObject */ public static function toPlaintext($text, $keep_urls = true) { - $naked_text = preg_replace('/\[.+?\]/','', $text); - if (!$keep_urls) { - $naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text); - } + $naked_text = HTML::toPlaintext(BBCode::convert($text, false, 0, true), 0, !$keep_urls); return $naked_text; } diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index 9f614a90c..a4829c8c3 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -56,6 +56,7 @@ class HTML $xpath = new DOMXPath($doc); + /** @var \DOMNode[] $list */ $list = $xpath->query("//" . $tag); foreach ($list as $node) { $attr = []; @@ -98,9 +99,12 @@ class HTML $node->parentNode->insertBefore($StartCode, $node); if ($node->hasChildNodes()) { + /** @var \DOMNode $child */ foreach ($node->childNodes as $child) { - $newNode = $child->cloneNode(true); - $node->parentNode->insertBefore($newNode, $node); + if (trim($child->nodeValue)) { + $newNode = $child->cloneNode(true); + $node->parentNode->insertBefore($newNode, $node); + } } } @@ -560,6 +564,8 @@ class HTML $ignore = false; } + $ignore = $ignore || strpos($treffer[1], '#') === 0; + if (!$ignore) { $urls[$treffer[1]] = $treffer[1]; } @@ -583,7 +589,7 @@ class HTML $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); - @$doc->loadHTML($message); + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS); $message = $doc->saveHTML(); // Remove eventual UTF-8 BOM @@ -592,7 +598,7 @@ class HTML // Collecting all links $urls = self::collectURLs($message); - @$doc->loadHTML($message); + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS); self::tagToBBCode($doc, 'html', [], '', ''); self::tagToBBCode($doc, 'body', [], '', ''); diff --git a/src/Module/Debug/Babel.php b/src/Module/Debug/Babel.php index be10da7ea..b9b629f07 100644 --- a/src/Module/Debug/Babel.php +++ b/src/Module/Debug/Babel.php @@ -148,6 +148,12 @@ class Babel extends BaseModule 'content' => htmlspecialchars($html2) ]; + $bbcode2plain = Text\BBCode::toPlaintext($bbcode); + $results[] = [ + 'title' => L10n::t('HTML::toBBCode => BBCode::toPlaintext'), + 'content' => '
' . $bbcode2plain . '
' + ]; + $markdown = Text\HTML::toMarkdown($html); $results[] = [ 'title' => L10n::t('HTML::toMarkdown'), @@ -162,7 +168,7 @@ class Babel extends BaseModule $text = Text\HTML::toPlaintext($html, 0, true); $results[] = [ - 'title' => L10n::t('HTML::toPlaintext'), + 'title' => L10n::t('HTML::toPlaintext (compact)'), 'content' => '
' . $text . '
' ]; } diff --git a/tests/datasets/content/text/html/bug-7457.html b/tests/datasets/content/text/html/bug-7457.html new file mode 100644 index 000000000..4a2d4b33c --- /dev/null +++ b/tests/datasets/content/text/html/bug-7457.html @@ -0,0 +1 @@ +

[1.0.4] - 2019-08-01

Fixed

\ No newline at end of file diff --git a/tests/datasets/content/text/html/bug-7457.txt b/tests/datasets/content/text/html/bug-7457.txt new file mode 100644 index 000000000..051071d55 --- /dev/null +++ b/tests/datasets/content/text/html/bug-7457.txt @@ -0,0 +1,5 @@ +*[1.0.4] - 2019-08-01* + +*Fixed* + +* Invalid SemVer version generation, when the current branch does not have commits ahead of tag/checked out on a tag \ No newline at end of file diff --git a/tests/src/Content/Text/HTMLTest.php b/tests/src/Content/Text/HTMLTest.php new file mode 100644 index 000000000..65ae05249 --- /dev/null +++ b/tests/src/Content/Text/HTMLTest.php @@ -0,0 +1,53 @@ +setUpVfsDir(); + $this->mockApp($this->root); + } + + public function dataHTML() + { + $inputFiles = glob(__DIR__ . '/../../../datasets/content/text/html/*.html'); + + $data = []; + + foreach ($inputFiles as $file) { + $data[str_replace('.html', '', $file)] = [ + 'input' => file_get_contents($file), + 'expected' => file_get_contents(str_replace('.html', '.txt', $file)) + ]; + } + + return $data; + } + + /** + * Test convert different input Markdown text into HTML + * + * @dataProvider dataHTML + * + * @param string $input The Markdown text to test + * @param string $expected The expected HTML output + * @throws \Exception + */ + public function testToPlaintext($input, $expected) + { + $output = HTML::toPlaintext($input, 0); + + $this->assertEquals($expected, $output); + } +} diff --git a/tests/src/Content/Text/MarkdownTest.php b/tests/src/Content/Text/MarkdownTest.php index e39b46b2c..80421b522 100644 --- a/tests/src/Content/Text/MarkdownTest.php +++ b/tests/src/Content/Text/MarkdownTest.php @@ -1,52 +1,52 @@ -setUpVfsDir(); - $this->mockApp($this->root); - } - - public function dataMarkdown() - { - $inputFiles = glob(__DIR__ . '/../../../datasets/content/text/markdown/*.md'); - - $data = []; - - foreach ($inputFiles as $file) { - $data[str_replace('.md', '', $file)] = [ - 'input' => file_get_contents($file), - 'expected' => file_get_contents(str_replace('.md', '.html', $file)) - ]; - } - - return $data; - } - - /** - * Test convert different input Markdown text into HTML - * @dataProvider dataMarkdown - * - * @param string $input The Markdown text to test - * @param string $expected The expected HTML output - * @throws \Exception - */ - public function testConvert($input, $expected) - { - $output = Markdown::convert($input); - - $this->assertEquals($expected, $output); - } -} \ No newline at end of file +setUpVfsDir(); + $this->mockApp($this->root); + } + + public function dataMarkdown() + { + $inputFiles = glob(__DIR__ . '/../../../datasets/content/text/markdown/*.md'); + + $data = []; + + foreach ($inputFiles as $file) { + $data[str_replace('.md', '', $file)] = [ + 'input' => file_get_contents($file), + 'expected' => file_get_contents(str_replace('.md', '.html', $file)) + ]; + } + + return $data; + } + + /** + * Test convert different input Markdown text into HTML + * @dataProvider dataMarkdown + * + * @param string $input The Markdown text to test + * @param string $expected The expected HTML output + * @throws \Exception + */ + public function testConvert($input, $expected) + { + $output = Markdown::convert($input); + + $this->assertEquals($expected, $output); + } +}