From 0bc7b89530e7f91f4876c6ada0a27d0d4288780c Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Thu, 4 Jun 2020 19:30:28 -0400 Subject: [PATCH] Replace existing block escaping by performWitEscaped* calls --- src/Content/Text/BBCode.php | 1231 +++++++++++++++++------------------ src/Content/Text/HTML.php | 464 +++++++------ 2 files changed, 832 insertions(+), 863 deletions(-) diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 0c718757b..d7afa1cd2 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -1256,643 +1256,626 @@ class BBCode { $a = DI::app(); - /* - * preg_match_callback function to replace potential Oembed tags with Oembed content - * - * $match[0] = [tag]$url[/tag] or [tag=$url]$title[/tag] - * $match[1] = $url - * $match[2] = $title or absent - */ - $try_oembed_callback = function ($match) - { - $url = $match[1]; - $title = $match[2] ?? null; + $text = self::performWithEscapedTags($text, ['code'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a) { + $text = self::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a) { + /* + * preg_match_callback function to replace potential Oembed tags with Oembed content + * + * $match[0] = [tag]$url[/tag] or [tag=$url]$title[/tag] + * $match[1] = $url + * $match[2] = $title or absent + */ + $try_oembed_callback = function ($match) + { + $url = $match[1]; + $title = $match[2] ?? null; - try { - $return = OEmbed::getHTML($url, $title); - } catch (Exception $ex) { - $return = $match[0]; - } - - return $return; - }; - - // Extracting code blocks before the whitespace processing and the autolinker - $codeblocks = []; - - $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#ism", - function ($matches) use (&$codeblocks) { - $return = '#codeblock-' . count($codeblocks) . '#'; - if (strpos($matches[2], "\n") !== false) { - $codeblocks[] = '
' . htmlspecialchars(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '
'; - } else { - $codeblocks[] = '' . htmlspecialchars($matches[2], ENT_NOQUOTES, 'UTF-8') . ''; - } - - return $return; - }, - $text - ); - - // Hide all [noparse] contained bbtags by spacefying them - // POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image? - - $text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'self::escapeNoparseCallback', $text); - $text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'self::escapeNoparseCallback', $text); - $text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::escapeNoparseCallback', $text); - - // Remove the abstract element. It is a non visible element. - $text = self::stripAbstract($text); - - // Move all spaces out of the tags - $text = preg_replace("/\[(\w*)\](\s*)/ism", '$2[$1]', $text); - $text = preg_replace("/(\s*)\[\/(\w*)\]/ism", '[/$2]$1', $text); - - // Extract the private images which use data urls since preg has issues with - // large data sizes. Stash them away while we do bbcode conversion, and then put them back - // in after we've done all the regex matching. We cannot use any preg functions to do this. - - $extracted = self::extractImagesFromItemBody($text); - $text = $extracted['body']; - $saved_image = $extracted['images']; - - // If we find any event code, turn it into an event. - // After we're finished processing the bbcode we'll - // replace all of the event code with a reformatted version. - - $ev = Event::fromBBCode($text); - - // Replace any html brackets with HTML Entities to prevent executing HTML or script - // Don't use strip_tags here because it breaks [url] search by replacing & with amp - - $text = str_replace("<", "<", $text); - $text = str_replace(">", ">", $text); - - // remove some newlines before the general conversion - $text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "[share$1]$2[/share]", $text); - $text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism", "[quote$1]$2[/quote]", $text); - - // when the content is meant exporting to other systems then remove the avatar picture since this doesn't really look good on these systems - if (!$try_oembed) { - $text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text); - } - - // Convert new line chars to html
tags - - // nlbr seems to be hopelessly messed up - // $Text = nl2br($Text); - - // We'll emulate it. - - $text = trim($text); - $text = str_replace("\r\n", "\n", $text); - - // Remove linefeeds inside of the table elements. See issue #6799 - $search = ["\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ", - "\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ", - "\n[tr]", "[tr]\n", " [tr]", "[tr] ", "\n[/tr]", "[/tr]\n", " [/tr]", "[/tr] ", - "[table]\n", "[table] ", " [table]", "\n[/table]", " [/table]", "[/table] "]; - $replace = ["[th]", "[th]", "[th]", "[/th]", "[/th]", "[/th]", - "[td]", "[td]", "[td]", "[/td]", "[/td]", "[/td]", - "[tr]", "[tr]", "[tr]", "[tr]", "[/tr]", "[/tr]", "[/tr]", "[/tr]", - "[table]", "[table]", "[table]", "[/table]", "[/table]", "[/table]"]; - do { - $oldtext = $text; - $text = str_replace($search, $replace, $text); - } while ($oldtext != $text); - - // Replace these here only once - $search = ["\n[table]", "[/table]\n"]; - $replace = ["[table]", "[/table]"]; - $text = str_replace($search, $replace, $text); - - // removing multiplicated newlines - if (DI::config()->get('system', 'remove_multiplicated_lines')) { - $search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n", - "\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n"]; - $replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[ul]", "[/ul]", "\n[share ", "[/attachment]", - "[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]"]; - do { - $oldtext = $text; - $text = str_replace($search, $replace, $text); - } while ($oldtext != $text); - } - - /// @todo Have a closer look at the different html modes - // Handle attached links or videos - if ($simple_html == self::ACTIVITYPUB) { - $text = self::removeAttachment($text); - } elseif (!in_array($simple_html, [self::INTERNAL, self::CONNECTORS])) { - $text = self::removeAttachment($text, true); - } else { - $text = self::convertAttachment($text, $simple_html, $try_oembed); - } - - // leave open the posibility of [map=something] - // this is replaced in Item::prepareBody() which has knowledge of the item location - if (strpos($text, '[/map]') !== false) { - $text = preg_replace_callback( - "/\[map\](.*?)\[\/map\]/ism", - function ($match) use ($simple_html) { - return str_replace($match[0], '

' . Map::byLocation($match[1], $simple_html) . '

', $match[0]); - }, - $text - ); - } - - if (strpos($text, '[map=') !== false) { - $text = preg_replace_callback( - "/\[map=(.*?)\]/ism", - function ($match) use ($simple_html) { - return str_replace($match[0], '

' . Map::byCoordinates(str_replace('/', ' ', $match[1]), $simple_html) . '

', $match[0]); - }, - $text - ); - } - - if (strpos($text, '[map]') !== false) { - $text = preg_replace("/\[map\]/", '

', $text); - } - - // Check for headers - $text = preg_replace("(\[h1\](.*?)\[\/h1\])ism", '

$1

', $text); - $text = preg_replace("(\[h2\](.*?)\[\/h2\])ism", '

$1

', $text); - $text = preg_replace("(\[h3\](.*?)\[\/h3\])ism", '

$1

', $text); - $text = preg_replace("(\[h4\](.*?)\[\/h4\])ism", '

$1

', $text); - $text = preg_replace("(\[h5\](.*?)\[\/h5\])ism", '
$1
', $text); - $text = preg_replace("(\[h6\](.*?)\[\/h6\])ism", '
$1
', $text); - - // Check for paragraph - $text = preg_replace("(\[p\](.*?)\[\/p\])ism", '

$1

', $text); - - // Check for bold text - $text = preg_replace("(\[b\](.*?)\[\/b\])ism", '$1', $text); - - // Check for Italics text - $text = preg_replace("(\[i\](.*?)\[\/i\])ism", '$1', $text); - - // Check for Underline text - $text = preg_replace("(\[u\](.*?)\[\/u\])ism", '$1', $text); - - // Check for strike-through text - $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '$1', $text); - - // Check for over-line text - $text = preg_replace("(\[o\](.*?)\[\/o\])ism", '$1', $text); - - // Check for colored text - $text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism", "$2", $text); - - // Check for sized text - // [size=50] --> font-size: 50px (with the unit). - if ($simple_html != self::DIASPORA) { - $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", "$2", $text); - $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); - } else { - // Issue 2199: Diaspora doesn't interpret the construct above, nor the or element - $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); - } - - - // Check for centered text - $text = preg_replace("(\[center\](.*?)\[\/center\])ism", "
$1
", $text); - - // Check for list text - $text = str_replace("[*]", "
  • ", $text); - - // Check for style sheet commands - $text = preg_replace_callback( - "(\[style=(.*?)\](.*?)\[\/style\])ism", - function ($match) { - return "" . $match[2] . ""; - }, - $text - ); - - // Check for CSS classes - $text = preg_replace_callback( - "(\[class=(.*?)\](.*?)\[\/class\])ism", - function ($match) { - return "" . $match[2] . ""; - }, - $text - ); - - // handle nested lists - $endlessloop = 0; - - while ((((strpos($text, "[/list]") !== false) && (strpos($text, "[list") !== false)) || - ((strpos($text, "[/ol]") !== false) && (strpos($text, "[ol]") !== false)) || - ((strpos($text, "[/ul]") !== false) && (strpos($text, "[ul]") !== false)) || - ((strpos($text, "[/li]") !== false) && (strpos($text, "[li]") !== false))) && (++$endlessloop < 20)) { - $text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); - $text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); - $text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); - $text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); - $text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '
  • $1
  • ', $text); - } - - $text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '$1', $text); - $text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '$1', $text); - $text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '$1', $text); - $text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '$1
    ', $text); - - $text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '$1
    ', $text); - $text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '$1
    ', $text); - - $text = str_replace('[hr]', '
    ', $text); - - if (!$for_plaintext) { - $escaped = []; - - // Escaping BBCodes susceptible to contain rogue URL we don'' want the autolinker to catch - $text = preg_replace_callback('#\[(url|img|audio|video|youtube|vimeo|share|attachment|iframe|bookmark).+?\[/\1\]#ism', - function ($matches) use (&$escaped) { - $return = '{escaped-' . count($escaped) . '}'; - $escaped[] = $matches[0]; + try { + $return = OEmbed::getHTML($url, $title); + } catch (Exception $ex) { + $return = $match[0]; + } return $return; - }, - $text - ); - - // Autolinker for isolated URLs - $text = preg_replace(Strings::autoLinkRegEx(), '[url]$1[/url]', $text); - - // Restoring escaped blocks - $text = preg_replace_callback('/{escaped-([0-9]+)}/iU', - function ($matches) use ($escaped) { - return $escaped[intval($matches[1])] ?? $matches[0]; - }, - $text - ); - } - - // This is actually executed in Item::prepareBody() - - $nosmile = strpos($text, '[nosmile]') !== false; - $text = str_replace('[nosmile]', '', $text); - - // Check for font change text - $text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm", "$2", $text); - - // Declare the format for [spoiler] layout - $SpoilerLayout = '
    ' . DI::l10n()->t('Click to open/close') . '$1
    '; - - // Check for [spoiler] text - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/spoiler]") !== false) && (strpos($text, "[spoiler]") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism", $SpoilerLayout, $text); - } - - // Check for [spoiler=Title] text - - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/spoiler]")!== false) && (strpos($text, "[spoiler=") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[spoiler=[\"\']*(.*?)[\"\']*\](.*?)\[\/spoiler\]/ism", - '
    $1$2
    ', - $text); - } - - // Declare the format for [quote] layout - $QuoteLayout = '
    $1
    '; - - // Check for [quote] text - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/quote]") !== false) && (strpos($text, "[quote]") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism", "$QuoteLayout", $text); - } - - // Check for [quote=Author] text - - $t_wrote = DI::l10n()->t('$1 wrote:'); - - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/quote]")!== false) && (strpos($text, "[quote=") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism", - "

    " . $t_wrote . "

    $2
    ", - $text); - } + }; - // [img=widthxheight]image source[/img] - $text = preg_replace_callback( - "/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", - function ($matches) use ($simple_html) { - if (strpos($matches[3], "data:image/") === 0) { - return $matches[0]; + + // Remove the abstract element. It is a non visible element. + $text = self::stripAbstract($text); + + // Move all spaces out of the tags + $text = preg_replace("/\[(\w*)\](\s*)/ism", '$2[$1]', $text); + $text = preg_replace("/(\s*)\[\/(\w*)\]/ism", '[/$2]$1', $text); + + // Extract the private images which use data urls since preg has issues with + // large data sizes. Stash them away while we do bbcode conversion, and then put them back + // in after we've done all the regex matching. We cannot use any preg functions to do this. + + $extracted = self::extractImagesFromItemBody($text); + $text = $extracted['body']; + $saved_image = $extracted['images']; + + // If we find any event code, turn it into an event. + // After we're finished processing the bbcode we'll + // replace all of the event code with a reformatted version. + + $ev = Event::fromBBCode($text); + + // Replace any html brackets with HTML Entities to prevent executing HTML or script + // Don't use strip_tags here because it breaks [url] search by replacing & with amp + + $text = str_replace("<", "<", $text); + $text = str_replace(">", ">", $text); + + // remove some newlines before the general conversion + $text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "[share$1]$2[/share]", $text); + $text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism", "[quote$1]$2[/quote]", $text); + + // when the content is meant exporting to other systems then remove the avatar picture since this doesn't really look good on these systems + if (!$try_oembed) { + $text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text); } - $matches[3] = self::proxyUrl($matches[3], $simple_html); - return "[img=" . $matches[1] . "x" . $matches[2] . "]" . $matches[3] . "[/img]"; - }, - $text - ); + // Convert new line chars to html
    tags - $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '', $text); - $text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '', $text); + // nlbr seems to be hopelessly messed up + // $Text = nl2br($Text); - $text = preg_replace_callback("/\[img\=(.*?)\](.*?)\[\/img\]/ism", - function ($matches) use ($simple_html) { - $matches[1] = self::proxyUrl($matches[1], $simple_html); - $matches[2] = htmlspecialchars($matches[2], ENT_COMPAT); - return '' . $matches[2] . ''; - }, - $text); + // We'll emulate it. - // Images - // [img]pathtoimage[/img] - $text = preg_replace_callback( - "/\[img\](.*?)\[\/img\]/ism", - function ($matches) use ($simple_html) { - if (strpos($matches[1], "data:image/") === 0) { - return $matches[0]; + $text = trim($text); + $text = str_replace("\r\n", "\n", $text); + + // Remove linefeeds inside of the table elements. See issue #6799 + $search = ["\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ", + "\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ", + "\n[tr]", "[tr]\n", " [tr]", "[tr] ", "\n[/tr]", "[/tr]\n", " [/tr]", "[/tr] ", + "[table]\n", "[table] ", " [table]", "\n[/table]", " [/table]", "[/table] "]; + $replace = ["[th]", "[th]", "[th]", "[/th]", "[/th]", "[/th]", + "[td]", "[td]", "[td]", "[/td]", "[/td]", "[/td]", + "[tr]", "[tr]", "[tr]", "[tr]", "[/tr]", "[/tr]", "[/tr]", "[/tr]", + "[table]", "[table]", "[table]", "[/table]", "[/table]", "[/table]"]; + do { + $oldtext = $text; + $text = str_replace($search, $replace, $text); + } while ($oldtext != $text); + + // Replace these here only once + $search = ["\n[table]", "[/table]\n"]; + $replace = ["[table]", "[/table]"]; + $text = str_replace($search, $replace, $text); + + // removing multiplicated newlines + if (DI::config()->get('system', 'remove_multiplicated_lines')) { + $search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n", + "\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n"]; + $replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[ul]", "[/ul]", "\n[share ", "[/attachment]", + "[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]"]; + do { + $oldtext = $text; + $text = str_replace($search, $replace, $text); + } while ($oldtext != $text); } - $matches[1] = self::proxyUrl($matches[1], $simple_html); - return "[img]" . $matches[1] . "[/img]"; - }, - $text - ); - - $text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); - $text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); - - $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); - $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); - //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $Text); - - // Simplify "video" element - $text = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $text); - - // Try to Oembed - if ($try_oembed) { - $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", '', $text); - $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text); - - $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text); - $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text); - } else { - $text = preg_replace("/\[video\](.*?)\[\/video\]/ism", - '$1', $text); - $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", - '$1', $text); - } - - // html5 video and audio - - - if ($try_oembed) { - $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $text); - } else { - $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '$1', $text); - } - - // Youtube extensions - if ($try_oembed) { - $text = preg_replace_callback("/\[youtube\](https?:\/\/www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); - $text = preg_replace_callback("/\[youtube\](www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); - $text = preg_replace_callback("/\[youtube\](https?:\/\/youtu.be\/.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); - } - - $text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); - $text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); - $text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); - - if ($try_oembed) { - $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $text); - } else { - $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", - 'https://www.youtube.com/watch?v=$1', $text); - } - - if ($try_oembed) { - $text = preg_replace_callback("/\[vimeo\](https?:\/\/player.vimeo.com\/video\/[0-9]+).*?\[\/vimeo\]/ism", $try_oembed_callback, $text); - $text = preg_replace_callback("/\[vimeo\](https?:\/\/vimeo.com\/[0-9]+).*?\[\/vimeo\]/ism", $try_oembed_callback, $text); - } - - $text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $text); - $text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $text); - - if ($try_oembed) { - $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $text); - } else { - $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", - 'https://vimeo.com/$1', $text); - } - - // oembed tag - $text = OEmbed::BBCode2HTML($text); - - // Avoid triple linefeeds through oembed - $text = str_replace("


    ", "

    ", $text); - - // If we found an event earlier, strip out all the event code and replace with a reformatted version. - // Replace the event-start section with the entire formatted event. The other bbcode is stripped. - // Summary (e.g. title) is required, earlier revisions only required description (in addition to - // start which is always required). Allow desc with a missing summary for compatibility. - - if ((!empty($ev['desc']) || !empty($ev['summary'])) && !empty($ev['start'])) { - $sub = Event::getHTML($ev, $simple_html); - - $text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $text); - $text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $text); - $text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism", $sub, $text); - $text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism", '', $text); - $text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism", '', $text); - $text = preg_replace("/\[event\-adjust\](.*?)\[\/event\-adjust\]/ism", '', $text); - $text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $text); - } - - // Replace non graphical smilies for external posts - if (!$nosmile && !$for_plaintext) { - $text = Smilies::replace($text); - } - - if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA)) { - $conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text)); - // Emojis are always 4 byte Unicode characters - if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) { - $text = '' . $text . ''; - } - } - - if (!$for_plaintext) { - if (in_array($simple_html, [self::OSTATUS, self::ACTIVITYPUB])) { - $text = preg_replace_callback("/\[url\](.*?)\[\/url\]/ism", 'self::convertUrlForActivityPubCallback', $text); - $text = preg_replace_callback("/\[url\=(.*?)\](.*?)\[\/url\]/ism", 'self::convertUrlForActivityPubCallback', $text); - } - } else { - $text = preg_replace("(\[url\](.*?)\[\/url\])ism", " $1 ", $text); - $text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'self::removePictureLinksCallback', $text); - } - - $text = str_replace(["\r","\n"], ['
    ', '
    '], $text); - - // Remove all hashtag addresses - if ($simple_html && !in_array($simple_html, [self::DIASPORA, self::OSTATUS, self::ACTIVITYPUB])) { - $text = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '$1$3', $text); - } elseif ($simple_html == self::DIASPORA) { - // The ! is converted to @ since Diaspora only understands the @ - $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", - '@$3', - $text); - } elseif (in_array($simple_html, [self::OSTATUS, self::ACTIVITYPUB])) { - $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", - '$1$3', - $text); - } elseif (!$simple_html) { - $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", - '$1$3', - $text); - } - - // Bookmarks in red - will be converted to bookmarks in friendica - $text = preg_replace("/#\^\[url\](.*?)\[\/url\]/ism", '[bookmark=$1]$1[/bookmark]', $text); - $text = preg_replace("/#\^\[url\=(.*?)\](.*?)\[\/url\]/ism", '[bookmark=$1]$2[/bookmark]', $text); - $text = preg_replace("/#\[url\=.*?\]\^\[\/url\]\[url\=(.*?)\](.*?)\[\/url\]/i", - "[bookmark=$1]$2[/bookmark]", $text); - - if (in_array($simple_html, [self::API, self::OSTATUS, self::TWITTER])) { - $text = preg_replace_callback("/([^#@!])\[url\=([^\]]*)\](.*?)\[\/url\]/ism", "self::expandLinksCallback", $text); - //$Text = preg_replace("/[^#@!]\[url\=([^\]]*)\](.*?)\[\/url\]/ism", ' $2 [url]$1[/url]', $Text); - $text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", ' $2 [url]$1[/url]',$text); - } - - // Perform URL Search - if ($try_oembed) { - $text = preg_replace_callback("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", $try_oembed_callback, $text); - } - - $text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", '[url=$1]$2[/url]', $text); - - // Handle Diaspora posts - $text = preg_replace_callback( - "&\[url=/?posts/([^\[\]]*)\](.*)\[\/url\]&Usi", - function ($match) { - return "[url=" . DI::baseUrl() . "/display/" . $match[1] . "]" . $match[2] . "[/url]"; - }, $text - ); - - $text = preg_replace_callback( - "&\[url=/people\?q\=(.*)\](.*)\[\/url\]&Usi", - function ($match) { - return "[url=" . DI::baseUrl() . "/search?search=%40" . $match[1] . "]" . $match[2] . "[/url]"; - }, $text - ); - - // Server independent link to posts and comments - // See issue: https://github.com/diaspora/diaspora_federation/issues/75 - $expression = "=diaspora://.*?/post/([0-9A-Za-z\-_@.:]{15,254}[0-9A-Za-z])=ism"; - $text = preg_replace($expression, DI::baseUrl()."/display/$1", $text); - - /* Tag conversion - * Supports: - * - #[url=][/url] - * - [url=]#[/url] - */ - $text = preg_replace_callback("/(?:#\[url\=[^\[\]]*\]|\[url\=[^\[\]]*\]#)(.*?)\[\/url\]/ism", function($matches) use ($simple_html) { - if ($simple_html == BBCode::ACTIVITYPUB) { - return '#' - . XML::escape($matches[1]) . ''; - } else { - return '#'; - } - }, $text); - - // We need no target="_blank" rel="noopener noreferrer" for local links - // convert links start with DI::baseUrl() as local link without the target="_blank" rel="noopener noreferrer" attribute - $escapedBaseUrl = preg_quote(DI::baseUrl(), '/'); - $text = preg_replace("/\[url\](".$escapedBaseUrl.".*?)\[\/url\]/ism", '$1', $text); - $text = preg_replace("/\[url\=(".$escapedBaseUrl.".*?)\](.*?)\[\/url\]/ism", '$2', $text); - - $text = preg_replace("/\[url\](.*?)\[\/url\]/ism", '$1', $text); - $text = preg_replace("/\[url\=(.*?)\](.*?)\[\/url\]/ism", '$2', $text); - - // Red compatibility, though the link can't be authenticated on Friendica - $text = preg_replace("/\[zrl\=(.*?)\](.*?)\[\/zrl\]/ism", '$2', $text); - - - // we may need to restrict this further if it picks up too many strays - // link acct:user@host to a webfinger profile redirector - - $text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', 'acct:$1@$2', $text); - - // Perform MAIL Search - $text = preg_replace("/\[mail\](.*?)\[\/mail\]/", '$1', $text); - $text = preg_replace("/\[mail\=(.*?)\](.*?)\[\/mail\]/", '$2', $text); - - // Unhide all [noparse] contained bbtags unspacefying them - // and triming the [noparse] tag. - - $text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'self::unescapeNoparseCallback', $text); - $text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'self::unescapeNoparseCallback', $text); - $text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::unescapeNoparseCallback', $text); - - /// @todo What is the meaning of these lines? - $text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/', '&$1;', $text); - $text = preg_replace('/\&\#039\;/', '\'', $text); - - // Currently deactivated, it made problems with " inside of alt texts. - //$text = preg_replace('/\"\;/', '"', $text); - - // fix any escaped ampersands that may have been converted into links - $text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&\;(.*?)\>/ism', '<$1$2=$3&$4>', $text); - - // sanitizes src attributes (http and redir URLs for displaying in a web page, cid used for inline images in emails) - $allowed_src_protocols = ['//', 'http://', 'https://', 'redir/', 'cid:']; - - array_walk($allowed_src_protocols, function(&$value) { $value = preg_quote($value, '#');}); - - $text = preg_replace('#<([^>]*?)(src)="(?!' . implode('|', $allowed_src_protocols) . ')(.*?)"(.*?)>#ism', - '<$1$2=""$4 data-original-src="$3" class="invalid-src" title="' . DI::l10n()->t('Invalid source protocol') . '">', $text); - - // sanitize href attributes (only allowlisted protocols URLs) - // default value for backward compatibility - $allowed_link_protocols = DI::config()->get('system', 'allowed_link_protocols', []); - - // Always allowed protocol even if config isn't set or not including it - $allowed_link_protocols[] = '//'; - $allowed_link_protocols[] = 'http://'; - $allowed_link_protocols[] = 'https://'; - $allowed_link_protocols[] = 'redir/'; - - array_walk($allowed_link_protocols, function(&$value) { $value = preg_quote($value, '#');}); - - $regex = '#<([^>]*?)(href)="(?!' . implode('|', $allowed_link_protocols) . ')(.*?)"(.*?)>#ism'; - $text = preg_replace($regex, '<$1$2="javascript:void(0)"$4 data-original-href="$3" class="invalid-href" title="' . DI::l10n()->t('Invalid link protocol') . '">', $text); - - // Shared content - $text = self::convertShare( - $text, - function (array $attributes, array $author_contact, $content, $is_quote_share) use ($simple_html) { - return self::convertShareCallback($attributes, $author_contact, $content, $is_quote_share, $simple_html); - } - ); - - if ($saved_image) { - $text = self::interpolateSavedImagesIntoItemBody($text, $saved_image); - } - - // Restore code blocks - $text = preg_replace_callback('/#codeblock-([0-9]+)#/iU', - function ($matches) use ($codeblocks) { - $return = $matches[0]; - if (isset($codeblocks[intval($matches[1])])) { - $return = $codeblocks[$matches[1]]; + /// @todo Have a closer look at the different html modes + // Handle attached links or videos + if ($simple_html == self::ACTIVITYPUB) { + $text = self::removeAttachment($text); + } elseif (!in_array($simple_html, [self::INTERNAL, self::CONNECTORS])) { + $text = self::removeAttachment($text, true); + } else { + $text = self::convertAttachment($text, $simple_html, $try_oembed); } + + // leave open the posibility of [map=something] + // this is replaced in Item::prepareBody() which has knowledge of the item location + if (strpos($text, '[/map]') !== false) { + $text = preg_replace_callback( + "/\[map\](.*?)\[\/map\]/ism", + function ($match) use ($simple_html) { + return str_replace($match[0], '

    ' . Map::byLocation($match[1], $simple_html) . '

    ', $match[0]); + }, + $text + ); + } + + if (strpos($text, '[map=') !== false) { + $text = preg_replace_callback( + "/\[map=(.*?)\]/ism", + function ($match) use ($simple_html) { + return str_replace($match[0], '

    ' . Map::byCoordinates(str_replace('/', ' ', $match[1]), $simple_html) . '

    ', $match[0]); + }, + $text + ); + } + + if (strpos($text, '[map]') !== false) { + $text = preg_replace("/\[map\]/", '

    ', $text); + } + + // Check for headers + $text = preg_replace("(\[h1\](.*?)\[\/h1\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h2\](.*?)\[\/h2\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h3\](.*?)\[\/h3\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h4\](.*?)\[\/h4\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h5\](.*?)\[\/h5\])ism", '
    $1
    ', $text); + $text = preg_replace("(\[h6\](.*?)\[\/h6\])ism", '
    $1
    ', $text); + + // Check for paragraph + $text = preg_replace("(\[p\](.*?)\[\/p\])ism", '

    $1

    ', $text); + + // Check for bold text + $text = preg_replace("(\[b\](.*?)\[\/b\])ism", '$1', $text); + + // Check for Italics text + $text = preg_replace("(\[i\](.*?)\[\/i\])ism", '$1', $text); + + // Check for Underline text + $text = preg_replace("(\[u\](.*?)\[\/u\])ism", '$1', $text); + + // Check for strike-through text + $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '$1', $text); + + // Check for over-line text + $text = preg_replace("(\[o\](.*?)\[\/o\])ism", '$1', $text); + + // Check for colored text + $text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism", "$2", $text); + + // Check for sized text + // [size=50] --> font-size: 50px (with the unit). + if ($simple_html != self::DIASPORA) { + $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", "$2", $text); + $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); + } else { + // Issue 2199: Diaspora doesn't interpret the construct above, nor the or element + $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); + } + + + // Check for centered text + $text = preg_replace("(\[center\](.*?)\[\/center\])ism", "
    $1
    ", $text); + + // Check for list text + $text = str_replace("[*]", "
  • ", $text); + + // Check for style sheet commands + $text = preg_replace_callback( + "(\[style=(.*?)\](.*?)\[\/style\])ism", + function ($match) { + return "" . $match[2] . ""; + }, + $text + ); + + // Check for CSS classes + $text = preg_replace_callback( + "(\[class=(.*?)\](.*?)\[\/class\])ism", + function ($match) { + return "" . $match[2] . ""; + }, + $text + ); + + // handle nested lists + $endlessloop = 0; + + while ((((strpos($text, "[/list]") !== false) && (strpos($text, "[list") !== false)) || + ((strpos($text, "[/ol]") !== false) && (strpos($text, "[ol]") !== false)) || + ((strpos($text, "[/ul]") !== false) && (strpos($text, "[ul]") !== false)) || + ((strpos($text, "[/li]") !== false) && (strpos($text, "[li]") !== false))) && (++$endlessloop < 20)) { + $text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); + $text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); + $text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); + $text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); + $text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '
  • $1
  • ', $text); + } + + $text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '$1', $text); + $text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '$1', $text); + $text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '$1', $text); + $text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '$1
    ', $text); + + $text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '$1
    ', $text); + $text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '$1
    ', $text); + + $text = str_replace('[hr]', '
    ', $text); + + if (!$for_plaintext) { + $escaped = []; + + // Escaping BBCodes susceptible to contain rogue URL we don'' want the autolinker to catch + $text = preg_replace_callback('#\[(url|img|audio|video|youtube|vimeo|share|attachment|iframe|bookmark).+?\[/\1\]#ism', + function ($matches) use (&$escaped) { + $return = '{escaped-' . count($escaped) . '}'; + $escaped[] = $matches[0]; + + return $return; + }, + $text + ); + + // Autolinker for isolated URLs + $text = preg_replace(Strings::autoLinkRegEx(), '[url]$1[/url]', $text); + + // Restoring escaped blocks + $text = preg_replace_callback('/{escaped-([0-9]+)}/iU', + function ($matches) use ($escaped) { + return $escaped[intval($matches[1])] ?? $matches[0]; + }, + $text + ); + } + + // This is actually executed in Item::prepareBody() + + $nosmile = strpos($text, '[nosmile]') !== false; + $text = str_replace('[nosmile]', '', $text); + + // Check for font change text + $text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm", "$2", $text); + + // Declare the format for [spoiler] layout + $SpoilerLayout = '
    ' . DI::l10n()->t('Click to open/close') . '$1
    '; + + // Check for [spoiler] text + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/spoiler]") !== false) && (strpos($text, "[spoiler]") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism", $SpoilerLayout, $text); + } + + // Check for [spoiler=Title] text + + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/spoiler]")!== false) && (strpos($text, "[spoiler=") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[spoiler=[\"\']*(.*?)[\"\']*\](.*?)\[\/spoiler\]/ism", + '
    $1$2
    ', + $text); + } + + // Declare the format for [quote] layout + $QuoteLayout = '
    $1
    '; + + // Check for [quote] text + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/quote]") !== false) && (strpos($text, "[quote]") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism", "$QuoteLayout", $text); + } + + // Check for [quote=Author] text + + $t_wrote = DI::l10n()->t('$1 wrote:'); + + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/quote]")!== false) && (strpos($text, "[quote=") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism", + "

    " . $t_wrote . "

    $2
    ", + $text); + } + + + // [img=widthxheight]image source[/img] + $text = preg_replace_callback( + "/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", + function ($matches) use ($simple_html) { + if (strpos($matches[3], "data:image/") === 0) { + return $matches[0]; + } + + $matches[3] = self::proxyUrl($matches[3], $simple_html); + return "[img=" . $matches[1] . "x" . $matches[2] . "]" . $matches[3] . "[/img]"; + }, + $text + ); + + $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '', $text); + $text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '', $text); + + $text = preg_replace_callback("/\[img\=(.*?)\](.*?)\[\/img\]/ism", + function ($matches) use ($simple_html) { + $matches[1] = self::proxyUrl($matches[1], $simple_html); + $matches[2] = htmlspecialchars($matches[2], ENT_COMPAT); + return '' . $matches[2] . ''; + }, + $text); + + // Images + // [img]pathtoimage[/img] + $text = preg_replace_callback( + "/\[img\](.*?)\[\/img\]/ism", + function ($matches) use ($simple_html) { + if (strpos($matches[1], "data:image/") === 0) { + return $matches[0]; + } + + $matches[1] = self::proxyUrl($matches[1], $simple_html); + return "[img]" . $matches[1] . "[/img]"; + }, + $text + ); + + $text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); + $text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); + + $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); + $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); + //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $Text); + + // Simplify "video" element + $text = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $text); + + // Try to Oembed + if ($try_oembed) { + $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", '', $text); + $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text); + + $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text); + $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text); + } else { + $text = preg_replace("/\[video\](.*?)\[\/video\]/ism", + '$1', $text); + $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", + '$1', $text); + } + + // html5 video and audio + + + if ($try_oembed) { + $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $text); + } else { + $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '$1', $text); + } + + // Youtube extensions + if ($try_oembed) { + $text = preg_replace_callback("/\[youtube\](https?:\/\/www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); + $text = preg_replace_callback("/\[youtube\](www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); + $text = preg_replace_callback("/\[youtube\](https?:\/\/youtu.be\/.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); + } + + $text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); + $text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); + $text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); + + if ($try_oembed) { + $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $text); + } else { + $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", + 'https://www.youtube.com/watch?v=$1', $text); + } + + if ($try_oembed) { + $text = preg_replace_callback("/\[vimeo\](https?:\/\/player.vimeo.com\/video\/[0-9]+).*?\[\/vimeo\]/ism", $try_oembed_callback, $text); + $text = preg_replace_callback("/\[vimeo\](https?:\/\/vimeo.com\/[0-9]+).*?\[\/vimeo\]/ism", $try_oembed_callback, $text); + } + + $text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $text); + $text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $text); + + if ($try_oembed) { + $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $text); + } else { + $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", + 'https://vimeo.com/$1', $text); + } + + // oembed tag + $text = OEmbed::BBCode2HTML($text); + + // Avoid triple linefeeds through oembed + $text = str_replace("


    ", "

    ", $text); + + // If we found an event earlier, strip out all the event code and replace with a reformatted version. + // Replace the event-start section with the entire formatted event. The other bbcode is stripped. + // Summary (e.g. title) is required, earlier revisions only required description (in addition to + // start which is always required). Allow desc with a missing summary for compatibility. + + if ((!empty($ev['desc']) || !empty($ev['summary'])) && !empty($ev['start'])) { + $sub = Event::getHTML($ev, $simple_html); + + $text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $text); + $text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $text); + $text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism", $sub, $text); + $text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism", '', $text); + $text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism", '', $text); + $text = preg_replace("/\[event\-adjust\](.*?)\[\/event\-adjust\]/ism", '', $text); + $text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $text); + } + + // Replace non graphical smilies for external posts + if (!$nosmile && !$for_plaintext) { + $text = Smilies::replace($text); + } + + if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA)) { + $conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text)); + // Emojis are always 4 byte Unicode characters + if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) { + $text = '' . $text . ''; + } + } + + if (!$for_plaintext) { + if (in_array($simple_html, [self::OSTATUS, self::ACTIVITYPUB])) { + $text = preg_replace_callback("/\[url\](.*?)\[\/url\]/ism", 'self::convertUrlForActivityPubCallback', $text); + $text = preg_replace_callback("/\[url\=(.*?)\](.*?)\[\/url\]/ism", 'self::convertUrlForActivityPubCallback', $text); + } + } else { + $text = preg_replace("(\[url\](.*?)\[\/url\])ism", " $1 ", $text); + $text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'self::removePictureLinksCallback', $text); + } + + $text = str_replace(["\r","\n"], ['
    ', '
    '], $text); + + // Remove all hashtag addresses + if ($simple_html && !in_array($simple_html, [self::DIASPORA, self::OSTATUS, self::ACTIVITYPUB])) { + $text = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '$1$3', $text); + } elseif ($simple_html == self::DIASPORA) { + // The ! is converted to @ since Diaspora only understands the @ + $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", + '@$3', + $text); + } elseif (in_array($simple_html, [self::OSTATUS, self::ACTIVITYPUB])) { + $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", + '$1$3', + $text); + } elseif (!$simple_html) { + $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", + '$1$3', + $text); + } + + // Bookmarks in red - will be converted to bookmarks in friendica + $text = preg_replace("/#\^\[url\](.*?)\[\/url\]/ism", '[bookmark=$1]$1[/bookmark]', $text); + $text = preg_replace("/#\^\[url\=(.*?)\](.*?)\[\/url\]/ism", '[bookmark=$1]$2[/bookmark]', $text); + $text = preg_replace("/#\[url\=.*?\]\^\[\/url\]\[url\=(.*?)\](.*?)\[\/url\]/i", + "[bookmark=$1]$2[/bookmark]", $text); + + if (in_array($simple_html, [self::API, self::OSTATUS, self::TWITTER])) { + $text = preg_replace_callback("/([^#@!])\[url\=([^\]]*)\](.*?)\[\/url\]/ism", "self::expandLinksCallback", $text); + //$Text = preg_replace("/[^#@!]\[url\=([^\]]*)\](.*?)\[\/url\]/ism", ' $2 [url]$1[/url]', $Text); + $text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", ' $2 [url]$1[/url]',$text); + } + + // Perform URL Search + if ($try_oembed) { + $text = preg_replace_callback("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", $try_oembed_callback, $text); + } + + $text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", '[url=$1]$2[/url]', $text); + + // Handle Diaspora posts + $text = preg_replace_callback( + "&\[url=/?posts/([^\[\]]*)\](.*)\[\/url\]&Usi", + function ($match) { + return "[url=" . DI::baseUrl() . "/display/" . $match[1] . "]" . $match[2] . "[/url]"; + }, $text + ); + + $text = preg_replace_callback( + "&\[url=/people\?q\=(.*)\](.*)\[\/url\]&Usi", + function ($match) { + return "[url=" . DI::baseUrl() . "/search?search=%40" . $match[1] . "]" . $match[2] . "[/url]"; + }, $text + ); + + // Server independent link to posts and comments + // See issue: https://github.com/diaspora/diaspora_federation/issues/75 + $expression = "=diaspora://.*?/post/([0-9A-Za-z\-_@.:]{15,254}[0-9A-Za-z])=ism"; + $text = preg_replace($expression, DI::baseUrl()."/display/$1", $text); + + /* Tag conversion + * Supports: + * - #[url=][/url] + * - [url=]#[/url] + */ + $text = preg_replace_callback("/(?:#\[url\=[^\[\]]*\]|\[url\=[^\[\]]*\]#)(.*?)\[\/url\]/ism", function($matches) use ($simple_html) { + if ($simple_html == BBCode::ACTIVITYPUB) { + return '#' + . XML::escape($matches[1]) . ''; + } else { + return '#'; + } + }, $text); + + // We need no target="_blank" rel="noopener noreferrer" for local links + // convert links start with DI::baseUrl() as local link without the target="_blank" rel="noopener noreferrer" attribute + $escapedBaseUrl = preg_quote(DI::baseUrl(), '/'); + $text = preg_replace("/\[url\](".$escapedBaseUrl.".*?)\[\/url\]/ism", '$1', $text); + $text = preg_replace("/\[url\=(".$escapedBaseUrl.".*?)\](.*?)\[\/url\]/ism", '$2', $text); + + $text = preg_replace("/\[url\](.*?)\[\/url\]/ism", '$1', $text); + $text = preg_replace("/\[url\=(.*?)\](.*?)\[\/url\]/ism", '$2', $text); + + // Red compatibility, though the link can't be authenticated on Friendica + $text = preg_replace("/\[zrl\=(.*?)\](.*?)\[\/zrl\]/ism", '$2', $text); + + + // we may need to restrict this further if it picks up too many strays + // link acct:user@host to a webfinger profile redirector + + $text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', 'acct:$1@$2', $text); + + // Perform MAIL Search + $text = preg_replace("/\[mail\](.*?)\[\/mail\]/", '$1', $text); + $text = preg_replace("/\[mail\=(.*?)\](.*?)\[\/mail\]/", '$2', $text); + + /// @todo What is the meaning of these lines? + $text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/', '&$1;', $text); + $text = preg_replace('/\&\#039\;/', '\'', $text); + + // Currently deactivated, it made problems with " inside of alt texts. + //$text = preg_replace('/\"\;/', '"', $text); + + // fix any escaped ampersands that may have been converted into links + $text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&\;(.*?)\>/ism', '<$1$2=$3&$4>', $text); + + // sanitizes src attributes (http and redir URLs for displaying in a web page, cid used for inline images in emails) + $allowed_src_protocols = ['//', 'http://', 'https://', 'redir/', 'cid:']; + + array_walk($allowed_src_protocols, function(&$value) { $value = preg_quote($value, '#');}); + + $text = preg_replace('#<([^>]*?)(src)="(?!' . implode('|', $allowed_src_protocols) . ')(.*?)"(.*?)>#ism', + '<$1$2=""$4 data-original-src="$3" class="invalid-src" title="' . DI::l10n()->t('Invalid source protocol') . '">', $text); + + // sanitize href attributes (only allowlisted protocols URLs) + // default value for backward compatibility + $allowed_link_protocols = DI::config()->get('system', 'allowed_link_protocols', []); + + // Always allowed protocol even if config isn't set or not including it + $allowed_link_protocols[] = '//'; + $allowed_link_protocols[] = 'http://'; + $allowed_link_protocols[] = 'https://'; + $allowed_link_protocols[] = 'redir/'; + + array_walk($allowed_link_protocols, function(&$value) { $value = preg_quote($value, '#');}); + + $regex = '#<([^>]*?)(href)="(?!' . implode('|', $allowed_link_protocols) . ')(.*?)"(.*?)>#ism'; + $text = preg_replace($regex, '<$1$2="javascript:void(0)"$4 data-original-href="$3" class="invalid-href" title="' . DI::l10n()->t('Invalid link protocol') . '">', $text); + + // Shared content + $text = self::convertShare( + $text, + function (array $attributes, array $author_contact, $content, $is_quote_share) use ($simple_html) { + return self::convertShareCallback($attributes, $author_contact, $content, $is_quote_share, $simple_html); + } + ); + + $text = self::interpolateSavedImagesIntoItemBody($text, $saved_image); + + return $text; + }); // Escaped noparse, nobb, pre + + // Remove escaping tags + $text = preg_replace("/\[noparse\](.*?)\[\/noparse\]/ism", '\1', $text); + $text = preg_replace("/\[nobb\](.*?)\[\/nobb\]/ism", '\1', $text); + $text = preg_replace("/\[pre\](.*?)\[\/pre\]/ism", '\1', $text); + + return $text; + }); // Escaped code + + $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#ism", + function ($matches) { + if (strpos($matches[2], "\n") !== false) { + $return = '
    ' . htmlspecialchars(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '
    '; + } else { + $return = '' . htmlspecialchars($matches[2], ENT_NOQUOTES, 'UTF-8') . ''; + } + return $return; }, $text diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index b35924b33..b69f5abc2 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -167,252 +167,238 @@ class HTML { $message = str_replace("\r", "", $message); - // Removing code blocks before the whitespace removal processing below - $codeblocks = []; + $message = Strings::performWithEscapedBlocks($message, '#
    #iUs', function ($message) { + $message = str_replace( + [ + "
  • ", + "

  • ", + ], + [ + "
  • ", + "
  • ", + ], + $message + ); + + // remove namespaces + $message = preg_replace('=<(\w+):(.+?)>=', '', $message); + $message = preg_replace('==', '', $message); + + $doc = new DOMDocument(); + $doc->preserveWhiteSpace = false; + + $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); + + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); + + XML::deleteNode($doc, 'style'); + XML::deleteNode($doc, 'head'); + XML::deleteNode($doc, 'title'); + XML::deleteNode($doc, 'meta'); + XML::deleteNode($doc, 'xml'); + XML::deleteNode($doc, 'removeme'); + + $xpath = new DomXPath($doc); + $list = $xpath->query("//pre"); + foreach ($list as $node) { + // Ensure to escape unescaped & - they will otherwise raise a warning + $safe_value = preg_replace('/&(?!\w+;)/', '&', $node->nodeValue); + $node->nodeValue = str_replace("\n", "\r", $safe_value); + } + + $message = $doc->saveHTML(); + $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "
    ", " ", ""], $message); + $message = preg_replace('= [\s]*=i', " ", $message); + + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); + + self::tagToBBCode($doc, 'html', [], "", ""); + self::tagToBBCode($doc, 'body', [], "", ""); + + // Outlook-Quote - Variant 1 + self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal', 'style' => 'margin-left:35.4pt'], '[quote]', '[/quote]'); + + // Outlook-Quote - Variant 2 + self::tagToBBCode( + $doc, + 'div', + ['style' => 'border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt'], + '[quote]', + '[/quote]' + ); + + // MyBB-Stuff + self::tagToBBCode($doc, 'span', ['style' => 'text-decoration: underline;'], '[u]', '[/u]'); + self::tagToBBCode($doc, 'span', ['style' => 'font-style: italic;'], '[i]', '[/i]'); + self::tagToBBCode($doc, 'span', ['style' => 'font-weight: bold;'], '[b]', '[/b]'); + + /* self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[font=$1][size=$2][color=$3]', '[/color][/size][/font]'); + self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[size=$1][color=$2]', '[/color][/size]'); + self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(.+)/'), '[font=$1][size=$2]', '[/size][/font]'); + self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'color'=>'/(.+)/'), '[font=$1][color=$3]', '[/color][/font]'); + self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/'), '[font=$1]', '[/font]'); + self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/'), '[size=$1]', '[/size]'); + self::node2BBCode($doc, 'font', array('color'=>'/(.+)/'), '[color=$1]', '[/color]'); + */ + // Untested + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*font-family:\s*(.+?)[,;].*color:\s*(.+?)[,;].*/'), '[size=$1][font=$2][color=$3]', '[/color][/font][/size]'); + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)[,;].*/'), '[size=$1]', '[/size]'); + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*/'), '[size=$1]', '[/size]'); + + self::tagToBBCode($doc, 'span', ['style' => '/.*color:\s*(.+?)[,;].*/'], '[color="$1"]', '[/color]'); + + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); + //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)pt.*/'), '[font=$1][size=$2]', '[/size][/font]'); + //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)px.*/'), '[font=$1][size=$2]', '[/size][/font]'); + //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); + // Importing the classes - interesting for importing of posts from third party networks that were exported from friendica + // Test + //self::node2BBCode($doc, 'span', array('class'=>'/([\w ]+)/'), '[class=$1]', '[/class]'); + self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]'); + self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]'); + + self::tagToBBCode($doc, 'strong', [], '[b]', '[/b]'); + self::tagToBBCode($doc, 'em', [], '[i]', '[/i]'); + self::tagToBBCode($doc, 'b', [], '[b]', '[/b]'); + self::tagToBBCode($doc, 'i', [], '[i]', '[/i]'); + self::tagToBBCode($doc, 'u', [], '[u]', '[/u]'); + self::tagToBBCode($doc, 's', [], '[s]', '[/s]'); + self::tagToBBCode($doc, 'del', [], '[s]', '[/s]'); + self::tagToBBCode($doc, 'strike', [], '[s]', '[/s]'); + + self::tagToBBCode($doc, 'big', [], "[size=large]", "[/size]"); + self::tagToBBCode($doc, 'small', [], "[size=small]", "[/size]"); + + self::tagToBBCode($doc, 'blockquote', [], '[quote]', '[/quote]'); + + self::tagToBBCode($doc, 'br', [], "\n", ''); + + self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal'], "\n", ""); + self::tagToBBCode($doc, 'div', ['class' => 'MsoNormal'], "\r", ""); + + self::tagToBBCode($doc, 'span', [], "", ""); + + self::tagToBBCode($doc, 'span', [], "", ""); + self::tagToBBCode($doc, 'pre', [], "", ""); + + self::tagToBBCode($doc, 'div', [], "\r", "\r"); + self::tagToBBCode($doc, 'p', [], "\n", "\n"); + + self::tagToBBCode($doc, 'ul', [], "[list]", "[/list]"); + self::tagToBBCode($doc, 'ol', [], "[list=1]", "[/list]"); + self::tagToBBCode($doc, 'li', [], "[*]", ""); + + self::tagToBBCode($doc, 'hr', [], "[hr]", ""); + + self::tagToBBCode($doc, 'table', [], "[table]", "[/table]"); + self::tagToBBCode($doc, 'th', [], "[th]", "[/th]"); + self::tagToBBCode($doc, 'tr', [], "[tr]", "[/tr]"); + self::tagToBBCode($doc, 'td', [], "[td]", "[/td]"); + + self::tagToBBCode($doc, 'h1', [], "[h1]", "[/h1]"); + self::tagToBBCode($doc, 'h2', [], "[h2]", "[/h2]"); + self::tagToBBCode($doc, 'h3', [], "[h3]", "[/h3]"); + self::tagToBBCode($doc, 'h4', [], "[h4]", "[/h4]"); + self::tagToBBCode($doc, 'h5', [], "[h5]", "[/h5]"); + self::tagToBBCode($doc, 'h6', [], "[h6]", "[/h6]"); + + self::tagToBBCode($doc, 'a', ['href' => '/mailto:(.+)/'], '[mail=$1]', '[/mail]'); + self::tagToBBCode($doc, 'a', ['href' => '/(.+)/'], '[url=$1]', '[/url]'); + + self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'alt' => '/(.+)/'], '[img=$1]$2', '[/img]', true); + self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'width' => '/(\d+)/', 'height' => '/(\d+)/'], '[img=$2x$3]$1', '[/img]', true); + self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], '[img]$1', '[/img]', true); + + + self::tagToBBCode($doc, 'video', ['src' => '/(.+)/'], '[video]$1', '[/video]', true); + self::tagToBBCode($doc, 'audio', ['src' => '/(.+)/'], '[audio]$1', '[/audio]', true); + self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[iframe]$1', '[/iframe]', true); + + self::tagToBBCode($doc, 'key', [], '[code]', '[/code]'); + self::tagToBBCode($doc, 'code', [], '[code]', '[/code]'); + + $message = $doc->saveHTML(); + + // I'm removing something really disturbing + // Don't know exactly what it is + $message = str_replace(chr(194) . chr(160), ' ', $message); + + $message = str_replace(" ", " ", $message); + + // removing multiple DIVs + $message = preg_replace('=\r *\r=i', "\n", $message); + $message = str_replace("\r", "\n", $message); + + Hook::callAll('html2bbcode', $message); + + $message = strip_tags($message); + + $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8'); + + // remove quotes if they don't make sense + $message = preg_replace('=\[/quote\][\s]*\[quote\]=i', "\n", $message); + + $message = preg_replace('=\[quote\]\s*=i', "[quote]", $message); + $message = preg_replace('=\s*\[/quote\]=i', "[/quote]", $message); + + do { + $oldmessage = $message; + $message = str_replace("\n \n", "\n\n", $message); + } while ($oldmessage != $message); + + do { + $oldmessage = $message; + $message = str_replace("\n\n\n", "\n\n", $message); + } while ($oldmessage != $message); + + do { + $oldmessage = $message; + $message = str_replace( + [ + "[/size]\n\n", + "\n[hr]", + "[hr]\n", + "\n[list", + "[/list]\n", + "\n[/", + "[list]\n", + "[list=1]\n", + "\n[*]"], + [ + "[/size]\n", + "[hr]", + "[hr]", + "[list", + "[/list]", + "[/", + "[list]", + "[list=1]", + "[*]"], + $message + ); + } while ($message != $oldmessage); + + $message = str_replace( + ['[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'], + ['[b]', '[/b]', '[i]', '[/i]'], + $message + ); + + // Handling Yahoo style of mails + $message = str_replace('[hr][b]From:[/b]', '[quote][b]From:[/b]', $message); + + return $message; + }); + $message = preg_replace_callback( '#
    (.*)
    #iUs', - function ($matches) use (&$codeblocks) { - $return = '[codeblock-' . count($codeblocks) . ']'; - + function ($matches) { $prefix = '[code]'; if ($matches[1] != '') { $prefix = '[code=' . $matches[1] . ']'; } - $codeblocks[] = $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]'; - return $return; - }, - $message - ); - - $message = str_replace( - [ - "
  • ", - "

  • ", - ], - [ - "
  • ", - "
  • ", - ], - $message - ); - - // remove namespaces - $message = preg_replace('=<(\w+):(.+?)>=', '', $message); - $message = preg_replace('==', '', $message); - - $doc = new DOMDocument(); - $doc->preserveWhiteSpace = false; - - $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); - - @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); - - XML::deleteNode($doc, 'style'); - XML::deleteNode($doc, 'head'); - XML::deleteNode($doc, 'title'); - XML::deleteNode($doc, 'meta'); - XML::deleteNode($doc, 'xml'); - XML::deleteNode($doc, 'removeme'); - - $xpath = new DomXPath($doc); - $list = $xpath->query("//pre"); - foreach ($list as $node) { - // Ensure to escape unescaped & - they will otherwise raise a warning - $safe_value = preg_replace('/&(?!\w+;)/', '&', $node->nodeValue); - $node->nodeValue = str_replace("\n", "\r", $safe_value); - } - - $message = $doc->saveHTML(); - $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "
    ", " ", ""], $message); - $message = preg_replace('= [\s]*=i', " ", $message); - - @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); - - self::tagToBBCode($doc, 'html', [], "", ""); - self::tagToBBCode($doc, 'body', [], "", ""); - - // Outlook-Quote - Variant 1 - self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal', 'style' => 'margin-left:35.4pt'], '[quote]', '[/quote]'); - - // Outlook-Quote - Variant 2 - self::tagToBBCode( - $doc, - 'div', - ['style' => 'border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt'], - '[quote]', - '[/quote]' - ); - - // MyBB-Stuff - self::tagToBBCode($doc, 'span', ['style' => 'text-decoration: underline;'], '[u]', '[/u]'); - self::tagToBBCode($doc, 'span', ['style' => 'font-style: italic;'], '[i]', '[/i]'); - self::tagToBBCode($doc, 'span', ['style' => 'font-weight: bold;'], '[b]', '[/b]'); - - /* self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[font=$1][size=$2][color=$3]', '[/color][/size][/font]'); - self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[size=$1][color=$2]', '[/color][/size]'); - self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(.+)/'), '[font=$1][size=$2]', '[/size][/font]'); - self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'color'=>'/(.+)/'), '[font=$1][color=$3]', '[/color][/font]'); - self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/'), '[font=$1]', '[/font]'); - self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/'), '[size=$1]', '[/size]'); - self::node2BBCode($doc, 'font', array('color'=>'/(.+)/'), '[color=$1]', '[/color]'); - */ - // Untested - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*font-family:\s*(.+?)[,;].*color:\s*(.+?)[,;].*/'), '[size=$1][font=$2][color=$3]', '[/color][/font][/size]'); - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)[,;].*/'), '[size=$1]', '[/size]'); - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*/'), '[size=$1]', '[/size]'); - - self::tagToBBCode($doc, 'span', ['style' => '/.*color:\s*(.+?)[,;].*/'], '[color="$1"]', '[/color]'); - - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); - //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)pt.*/'), '[font=$1][size=$2]', '[/size][/font]'); - //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)px.*/'), '[font=$1][size=$2]', '[/size][/font]'); - //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); - // Importing the classes - interesting for importing of posts from third party networks that were exported from friendica - // Test - //self::node2BBCode($doc, 'span', array('class'=>'/([\w ]+)/'), '[class=$1]', '[/class]'); - self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]'); - self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]'); - - self::tagToBBCode($doc, 'strong', [], '[b]', '[/b]'); - self::tagToBBCode($doc, 'em', [], '[i]', '[/i]'); - self::tagToBBCode($doc, 'b', [], '[b]', '[/b]'); - self::tagToBBCode($doc, 'i', [], '[i]', '[/i]'); - self::tagToBBCode($doc, 'u', [], '[u]', '[/u]'); - self::tagToBBCode($doc, 's', [], '[s]', '[/s]'); - self::tagToBBCode($doc, 'del', [], '[s]', '[/s]'); - self::tagToBBCode($doc, 'strike', [], '[s]', '[/s]'); - - self::tagToBBCode($doc, 'big', [], "[size=large]", "[/size]"); - self::tagToBBCode($doc, 'small', [], "[size=small]", "[/size]"); - - self::tagToBBCode($doc, 'blockquote', [], '[quote]', '[/quote]'); - - self::tagToBBCode($doc, 'br', [], "\n", ''); - - self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal'], "\n", ""); - self::tagToBBCode($doc, 'div', ['class' => 'MsoNormal'], "\r", ""); - - self::tagToBBCode($doc, 'span', [], "", ""); - - self::tagToBBCode($doc, 'span', [], "", ""); - self::tagToBBCode($doc, 'pre', [], "", ""); - - self::tagToBBCode($doc, 'div', [], "\r", "\r"); - self::tagToBBCode($doc, 'p', [], "\n", "\n"); - - self::tagToBBCode($doc, 'ul', [], "[list]", "[/list]"); - self::tagToBBCode($doc, 'ol', [], "[list=1]", "[/list]"); - self::tagToBBCode($doc, 'li', [], "[*]", ""); - - self::tagToBBCode($doc, 'hr', [], "[hr]", ""); - - self::tagToBBCode($doc, 'table', [], "[table]", "[/table]"); - self::tagToBBCode($doc, 'th', [], "[th]", "[/th]"); - self::tagToBBCode($doc, 'tr', [], "[tr]", "[/tr]"); - self::tagToBBCode($doc, 'td', [], "[td]", "[/td]"); - - self::tagToBBCode($doc, 'h1', [], "[h1]", "[/h1]"); - self::tagToBBCode($doc, 'h2', [], "[h2]", "[/h2]"); - self::tagToBBCode($doc, 'h3', [], "[h3]", "[/h3]"); - self::tagToBBCode($doc, 'h4', [], "[h4]", "[/h4]"); - self::tagToBBCode($doc, 'h5', [], "[h5]", "[/h5]"); - self::tagToBBCode($doc, 'h6', [], "[h6]", "[/h6]"); - - self::tagToBBCode($doc, 'a', ['href' => '/mailto:(.+)/'], '[mail=$1]', '[/mail]'); - self::tagToBBCode($doc, 'a', ['href' => '/(.+)/'], '[url=$1]', '[/url]'); - - self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'alt' => '/(.+)/'], '[img=$1]$2', '[/img]', true); - self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'width' => '/(\d+)/', 'height' => '/(\d+)/'], '[img=$2x$3]$1', '[/img]', true); - self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], '[img]$1', '[/img]', true); - - - self::tagToBBCode($doc, 'video', ['src' => '/(.+)/'], '[video]$1', '[/video]', true); - self::tagToBBCode($doc, 'audio', ['src' => '/(.+)/'], '[audio]$1', '[/audio]', true); - self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[iframe]$1', '[/iframe]', true); - - self::tagToBBCode($doc, 'key', [], '[code]', '[/code]'); - self::tagToBBCode($doc, 'code', [], '[code]', '[/code]'); - - $message = $doc->saveHTML(); - - // I'm removing something really disturbing - // Don't know exactly what it is - $message = str_replace(chr(194) . chr(160), ' ', $message); - - $message = str_replace(" ", " ", $message); - - // removing multiple DIVs - $message = preg_replace('=\r *\r=i', "\n", $message); - $message = str_replace("\r", "\n", $message); - - Hook::callAll('html2bbcode', $message); - - $message = strip_tags($message); - - $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8'); - - // remove quotes if they don't make sense - $message = preg_replace('=\[/quote\][\s]*\[quote\]=i', "\n", $message); - - $message = preg_replace('=\[quote\]\s*=i', "[quote]", $message); - $message = preg_replace('=\s*\[/quote\]=i', "[/quote]", $message); - - do { - $oldmessage = $message; - $message = str_replace("\n \n", "\n\n", $message); - } while ($oldmessage != $message); - - do { - $oldmessage = $message; - $message = str_replace("\n\n\n", "\n\n", $message); - } while ($oldmessage != $message); - - do { - $oldmessage = $message; - $message = str_replace( - [ - "[/size]\n\n", - "\n[hr]", - "[hr]\n", - "\n[list", - "[/list]\n", - "\n[/", - "[list]\n", - "[list=1]\n", - "\n[*]"], - [ - "[/size]\n", - "[hr]", - "[hr]", - "[list", - "[/list]", - "[/", - "[list]", - "[list=1]", - "[*]"], - $message - ); - } while ($message != $oldmessage); - - $message = str_replace( - ['[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'], - ['[b]', '[/b]', '[i]', '[/i]'], - $message - ); - - // Handling Yahoo style of mails - $message = str_replace('[hr][b]From:[/b]', '[quote][b]From:[/b]', $message); - - // Restore code blocks - $message = preg_replace_callback( - '#\[codeblock-([0-9]+)\]#iU', - function ($matches) use ($codeblocks) { - $return = ''; - if (isset($codeblocks[intval($matches[1])])) { - $return = $codeblocks[$matches[1]]; - } - return $return; + return $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]'; }, $message );