kopia lustrzana https://gitlab.com/jaywink/federation
				
				
				
			Merge branch 'inbound-ap-html' into 'master'
Stop markdownifying received ActivityPub content Closes socialhome/socialhome#198 and socialhome/socialhome#222 See merge request jaywink/federation!160merge-requests/161/merge
						commit
						b4cc7071f4
					
				|  | @ -32,6 +32,10 @@ | ||||||
|    |    | ||||||
| * Don't include OStatus for Mastodon 3.0+ protocols list. ([related issue](https://github.com/thefederationinfo/the-federation.info/issues/217)) | * Don't include OStatus for Mastodon 3.0+ protocols list. ([related issue](https://github.com/thefederationinfo/the-federation.info/issues/217)) | ||||||
| 
 | 
 | ||||||
|  | * **Backwards incompatible**: Stop markdownifying incoming ActivityPub content. Instead | ||||||
|  |   copy it as is to the ``raw_content`` attribute on the entity, setting also the | ||||||
|  |   ``_media_type`` to ``text/html``. | ||||||
|  | 
 | ||||||
| ### Fixed | ### Fixed | ||||||
| 
 | 
 | ||||||
| * Don't crash loudly when fetching webfinger for Diaspora that does not contain XML. | * Don't crash loudly when fetching webfinger for Diaspora that does not contain XML. | ||||||
|  | @ -59,6 +63,10 @@ | ||||||
| 
 | 
 | ||||||
| * Don't try to relay AP payloads to Diaspora receivers and vice versa, for now, until cross-protocol | * Don't try to relay AP payloads to Diaspora receivers and vice versa, for now, until cross-protocol | ||||||
|   relaying is supported. |   relaying is supported. | ||||||
|  |    | ||||||
|  | * Fix some characters stopping tags being identified ([related issue](https://git.feneas.org/socialhome/socialhome/-/issues/222)) | ||||||
|  | 
 | ||||||
|  | * Fix tags separated by slashes being identified ([related issue](https://git.feneas.org/socialhome/socialhome/-/issues/198)) | ||||||
| 
 | 
 | ||||||
| ## [0.19.0] - 2019-12-15 | ## [0.19.0] - 2019-12-15 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3,6 +3,8 @@ import re | ||||||
| import uuid | import uuid | ||||||
| from typing import Dict, List | from typing import Dict, List | ||||||
| 
 | 
 | ||||||
|  | import bleach | ||||||
|  | 
 | ||||||
| from federation.entities.activitypub.constants import ( | from federation.entities.activitypub.constants import ( | ||||||
|     CONTEXTS_DEFAULT, CONTEXT_MANUALLY_APPROVES_FOLLOWERS, CONTEXT_SENSITIVE, CONTEXT_HASHTAG, |     CONTEXTS_DEFAULT, CONTEXT_MANUALLY_APPROVES_FOLLOWERS, CONTEXT_SENSITIVE, CONTEXT_HASHTAG, | ||||||
|     CONTEXT_LD_SIGNATURES) |     CONTEXT_LD_SIGNATURES) | ||||||
|  | @ -57,15 +59,19 @@ class CleanContentMixin(RawContentMixin): | ||||||
|         """ |         """ | ||||||
|         Make linkified tags normal tags. |         Make linkified tags normal tags. | ||||||
|         """ |         """ | ||||||
|         def cleaner(match): |  | ||||||
|             return f"#{match.groups()[0]}" |  | ||||||
| 
 |  | ||||||
|         super().post_receive() |         super().post_receive() | ||||||
|         self.raw_content = re.sub( | 
 | ||||||
|             r'\[#([\w\-_]+)\]\(http?s://[a-zA-Z0-9/._-]+\)', |         def remove_tag_links(attrs, new=False): | ||||||
|             cleaner, |             rel = (None, "rel") | ||||||
|  |             if attrs.get(rel) == "tag": | ||||||
|  |                 return | ||||||
|  |             return attrs | ||||||
|  | 
 | ||||||
|  |         self.raw_content = bleach.linkify( | ||||||
|             self.raw_content, |             self.raw_content, | ||||||
|             re.MULTILINE, |             callbacks=[remove_tag_links], | ||||||
|  |             parse_email=False, | ||||||
|  |             skip_tags=["code", "pre"], | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,8 +1,6 @@ | ||||||
| import logging | import logging | ||||||
| from typing import List, Callable, Dict, Union, Optional | from typing import List, Callable, Dict, Union, Optional | ||||||
| 
 | 
 | ||||||
| from markdownify import markdownify |  | ||||||
| 
 |  | ||||||
| from federation.entities.activitypub.constants import NAMESPACE_PUBLIC | from federation.entities.activitypub.constants import NAMESPACE_PUBLIC | ||||||
| from federation.entities.activitypub.entities import ( | from federation.entities.activitypub.entities import ( | ||||||
|     ActivitypubFollow, ActivitypubProfile, ActivitypubAccept, ActivitypubPost, ActivitypubComment, |     ActivitypubFollow, ActivitypubProfile, ActivitypubAccept, ActivitypubPost, ActivitypubComment, | ||||||
|  | @ -259,19 +257,16 @@ def transform_attribute( | ||||||
|     elif key == "attributedTo" and is_object: |     elif key == "attributedTo" and is_object: | ||||||
|         transformed["actor_id"] = value |         transformed["actor_id"] = value | ||||||
|     elif key in ("content", "source"): |     elif key in ("content", "source"): | ||||||
|         if payload.get('source') and isinstance(payload.get("source"), dict): |         if payload.get('source') and isinstance(payload.get("source"), dict) and \ | ||||||
|  |                 payload.get('source').get('mediaType') == "text/markdown": | ||||||
|  |             transformed["_media_type"] = "text/markdown" | ||||||
|  |             transformed["raw_content"] = payload.get('source').get('content').strip() | ||||||
|             transformed["_rendered_content"] = payload.get('content').strip() |             transformed["_rendered_content"] = payload.get('content').strip() | ||||||
|             if payload.get('source').get('mediaType') == "text/markdown": |  | ||||||
|                 transformed["_media_type"] = "text/markdown" |  | ||||||
|                 transformed["raw_content"] = payload.get('source').get('content').strip() |  | ||||||
|             else: |  | ||||||
|                 transformed["raw_content"] = markdownify(payload.get('content').strip()) |  | ||||||
|                 transformed["_media_type"] = payload.get('source').get('mediaType') |  | ||||||
|         else: |         else: | ||||||
|             transformed["raw_content"] = markdownify(payload.get('content').strip()).strip() |  | ||||||
|             # Assume HTML by convention |             # Assume HTML by convention | ||||||
|             transformed["_rendered_content"] = payload.get('content').strip() |  | ||||||
|             transformed["_media_type"] = "text/html" |             transformed["_media_type"] = "text/html" | ||||||
|  |             transformed["raw_content"] = payload.get('content').strip() | ||||||
|  |             transformed["_rendered_content"] = transformed["raw_content"] | ||||||
|     elif key == "endpoints" and isinstance(value, dict): |     elif key == "endpoints" and isinstance(value, dict): | ||||||
|         if "inboxes" not in transformed: |         if "inboxes" not in transformed: | ||||||
|             transformed["inboxes"] = {"private": None, "public": None} |             transformed["inboxes"] = {"private": None, "public": None} | ||||||
|  |  | ||||||
|  | @ -409,10 +409,6 @@ class TestEntitiesPostReceive: | ||||||
|             "public": False, |             "public": False, | ||||||
|         }] |         }] | ||||||
| 
 | 
 | ||||||
|     def test_post__post_receive__cleans_linkified_tags(self, activitypubpost_linkified_tags): |  | ||||||
|         activitypubpost_linkified_tags.post_receive() |  | ||||||
|         assert activitypubpost_linkified_tags.raw_content == '<p>👁️foobar</p><p>barfoo!<br>#fanart #mastoart</p>' |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| class TestEntitiesPreSend: | class TestEntitiesPreSend: | ||||||
|     def test_post_inline_images_are_attached(self, activitypubpost_embedded_images): |     def test_post_inline_images_are_attached(self, activitypubpost_embedded_images): | ||||||
|  |  | ||||||
|  | @ -67,7 +67,9 @@ class TestActivitypubEntityMappersReceive: | ||||||
|         post = entities[0] |         post = entities[0] | ||||||
|         assert isinstance(post, ActivitypubPost) |         assert isinstance(post, ActivitypubPost) | ||||||
|         assert isinstance(post, Post) |         assert isinstance(post, Post) | ||||||
|         assert post.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom' |         assert post.raw_content == '<p><span class="h-card"><a class="u-url mention" ' \ | ||||||
|  |                                    'href="https://dev.jasonrobinson.me/u/jaywink/">' \ | ||||||
|  |                                    '@<span>jaywink</span></a></span> boom</p>' | ||||||
|         assert post.rendered_content == '<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" ' \ |         assert post.rendered_content == '<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" ' \ | ||||||
|                                         'class="u-url mention">@<span>jaywink</span></a></span> boom</p>' |                                         'class="u-url mention">@<span>jaywink</span></a></span> boom</p>' | ||||||
|         assert post.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237" |         assert post.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237" | ||||||
|  | @ -82,7 +84,7 @@ class TestActivitypubEntityMappersReceive: | ||||||
|         post = entities[0] |         post = entities[0] | ||||||
|         assert isinstance(post, ActivitypubPost) |         assert isinstance(post, ActivitypubPost) | ||||||
|         assert isinstance(post, Post) |         assert isinstance(post, Post) | ||||||
|         assert post.raw_content == 'boom #test' |         assert post.raw_content == '<p>boom #test</p>' | ||||||
| 
 | 
 | ||||||
|     def test_message_to_objects_simple_post__with_mentions(self): |     def test_message_to_objects_simple_post__with_mentions(self): | ||||||
|         entities = message_to_objects(ACTIVITYPUB_POST_WITH_MENTIONS, "https://mastodon.social/users/jaywink") |         entities = message_to_objects(ACTIVITYPUB_POST_WITH_MENTIONS, "https://mastodon.social/users/jaywink") | ||||||
|  | @ -101,7 +103,9 @@ class TestActivitypubEntityMappersReceive: | ||||||
|         assert isinstance(post, Post) |         assert isinstance(post, Post) | ||||||
|         assert post.rendered_content == '<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" ' \ |         assert post.rendered_content == '<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" ' \ | ||||||
|                                         'class="u-url mention">@<span>jaywink</span></a></span> boom</p>' |                                         'class="u-url mention">@<span>jaywink</span></a></span> boom</p>' | ||||||
|         assert post.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom\n\n' |         assert post.raw_content == '<p><span class="h-card"><a class="u-url mention" ' \ | ||||||
|  |                                    'href="https://dev.jasonrobinson.me/u/jaywink/">' \ | ||||||
|  |                                    '@<span>jaywink</span></a></span> boom</p>' | ||||||
| 
 | 
 | ||||||
|     def test_message_to_objects_simple_post__with_source__markdown(self): |     def test_message_to_objects_simple_post__with_source__markdown(self): | ||||||
|         entities = message_to_objects(ACTIVITYPUB_POST_WITH_SOURCE_MARKDOWN, "https://diaspodon.fr/users/jaywink") |         entities = message_to_objects(ACTIVITYPUB_POST_WITH_SOURCE_MARKDOWN, "https://diaspodon.fr/users/jaywink") | ||||||
|  | @ -141,7 +145,9 @@ class TestActivitypubEntityMappersReceive: | ||||||
|         comment = entities[0] |         comment = entities[0] | ||||||
|         assert isinstance(comment, ActivitypubComment) |         assert isinstance(comment, ActivitypubComment) | ||||||
|         assert isinstance(comment, Comment) |         assert isinstance(comment, Comment) | ||||||
|         assert comment.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom' |         assert comment.raw_content == '<p><span class="h-card"><a class="u-url mention" ' \ | ||||||
|  |                                       'href="https://dev.jasonrobinson.me/u/jaywink/">' \ | ||||||
|  |                                       '@<span>jaywink</span></a></span> boom</p>' | ||||||
|         assert comment.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237" |         assert comment.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237" | ||||||
|         assert comment.actor_id == "https://diaspodon.fr/users/jaywink" |         assert comment.actor_id == "https://diaspodon.fr/users/jaywink" | ||||||
|         assert comment.target_id == "https://dev.jasonrobinson.me/content/653bad70-41b3-42c9-89cb-c4ee587e68e4/" |         assert comment.target_id == "https://dev.jasonrobinson.me/content/653bad70-41b3-42c9-89cb-c4ee587e68e4/" | ||||||
|  |  | ||||||
|  | @ -4,7 +4,7 @@ from freezegun import freeze_time | ||||||
| from federation.entities.activitypub.entities import ( | from federation.entities.activitypub.entities import ( | ||||||
|     ActivitypubPost, ActivitypubAccept, ActivitypubFollow, ActivitypubProfile, ActivitypubComment, |     ActivitypubPost, ActivitypubAccept, ActivitypubFollow, ActivitypubProfile, ActivitypubComment, | ||||||
|     ActivitypubRetraction, ActivitypubShare, ActivitypubImage) |     ActivitypubRetraction, ActivitypubShare, ActivitypubImage) | ||||||
| from federation.entities.base import Profile, Image | from federation.entities.base import Profile | ||||||
| from federation.entities.diaspora.entities import ( | from federation.entities.diaspora.entities import ( | ||||||
|     DiasporaPost, DiasporaComment, DiasporaLike, DiasporaProfile, DiasporaRetraction, |     DiasporaPost, DiasporaComment, DiasporaLike, DiasporaProfile, DiasporaRetraction, | ||||||
|     DiasporaContact, DiasporaReshare, |     DiasporaContact, DiasporaReshare, | ||||||
|  | @ -144,20 +144,6 @@ https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902 | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.fixture |  | ||||||
| def activitypubpost_linkified_tags(): |  | ||||||
|     with freeze_time("2019-04-27"): |  | ||||||
|         return ActivitypubPost( |  | ||||||
|             raw_content='<p>👁️foobar</p><p>barfoo!<br>[#fanart](https://mastodon.art/tags/fanart) ' |  | ||||||
|                         '[#mastoart](https://mastodon.art/tags/mastoart)</p>', |  | ||||||
|             public=True, |  | ||||||
|             provider_display_name="Mastodon", |  | ||||||
|             id=f"http://127.0.0.1:8000/post/123456/", |  | ||||||
|             activity_id=f"http://127.0.0.1:8000/post/123456/#create", |  | ||||||
|             actor_id=f"http://127.0.0.1:8000/profile/123456/", |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| @pytest.fixture | @pytest.fixture | ||||||
| def activitypubprofile(): | def activitypubprofile(): | ||||||
|     return ActivitypubProfile( |     return ActivitypubProfile( | ||||||
|  |  | ||||||
|  | @ -34,12 +34,13 @@ class TestFindTags: | ||||||
|         assert text == "foo\n```\n#code\n```\n#notcode/notcode\n\n    #alsocode\n" |         assert text == "foo\n```\n#code\n```\n#notcode/notcode\n\n    #alsocode\n" | ||||||
| 
 | 
 | ||||||
|     def test_endings_are_filtered_out(self): |     def test_endings_are_filtered_out(self): | ||||||
|         source = "#parenthesis) #exp! #list]" |         source = "#parenthesis) #exp! #list] *#doh* _#bah_ #gah% #foo/#bar" | ||||||
|         tags, text = find_tags(source) |         tags, text = find_tags(source) | ||||||
|         assert tags == {"parenthesis", "exp", "list"} |         assert tags == {"parenthesis", "exp", "list", "doh", "bah", "gah", "foo", "bar"} | ||||||
|         assert text == source |         assert text == source | ||||||
|         tags, text = find_tags(source, replacer=self._replacer) |         tags, text = find_tags(source, replacer=self._replacer) | ||||||
|         assert text == "#parenthesis/parenthesis) #exp/exp! #list/list]" |         assert text == "#parenthesis/parenthesis) #exp/exp! #list/list] *#doh/doh* _#bah/bah_ #gah/gah% " \ | ||||||
|  |                        "#foo/foo/#bar/bar" | ||||||
| 
 | 
 | ||||||
|     def test_finds_tags(self): |     def test_finds_tags(self): | ||||||
|         source = "#post **Foobar** #tag #OtherTag #third\n#fourth" |         source = "#post **Foobar** #tag #OtherTag #third\n#fourth" | ||||||
|  | @ -49,6 +50,14 @@ class TestFindTags: | ||||||
|         tags, text = find_tags(source, replacer=self._replacer) |         tags, text = find_tags(source, replacer=self._replacer) | ||||||
|         assert text == "#post/post **Foobar** #tag/tag #OtherTag/othertag #third/third\n#fourth/fourth" |         assert text == "#post/post **Foobar** #tag/tag #OtherTag/othertag #third/third\n#fourth/fourth" | ||||||
| 
 | 
 | ||||||
|  |     def test_ok_with_html_tags_in_text(self): | ||||||
|  |         source = "<p>#starting and <span>#MixED</span> however not <#>this</#> or <#/>that" | ||||||
|  |         tags, text = find_tags(source) | ||||||
|  |         assert tags == {"starting", "mixed"} | ||||||
|  |         assert text == source | ||||||
|  |         tags, text = find_tags(source, replacer=self._replacer) | ||||||
|  |         assert text == "<p>#starting/starting and <span>#MixED/mixed</span> however not <#>this</#> or <#/>that" | ||||||
|  | 
 | ||||||
|     def test_postfixed_tags(self): |     def test_postfixed_tags(self): | ||||||
|         source = "#foo) #bar] #hoo, #hee." |         source = "#foo) #bar] #hoo, #hee." | ||||||
|         tags, text = find_tags(source) |         tags, text = find_tags(source) | ||||||
|  | @ -66,7 +75,7 @@ class TestFindTags: | ||||||
|         assert text == "(#foo/foo [#bar/bar" |         assert text == "(#foo/foo [#bar/bar" | ||||||
| 
 | 
 | ||||||
|     def test_invalid_text_returns_no_tags(self): |     def test_invalid_text_returns_no_tags(self): | ||||||
|         source = "#a!a #a#a #a$a #a%a #a^a #a&a #a*a #a+a #a.a #a,a #a@a #a£a #a/a #a(a #a)a #a=a " \ |         source = "#a!a #a#a #a$a #a%a #a^a #a&a #a*a #a+a #a.a #a,a #a@a #a£a #a(a #a)a #a=a " \ | ||||||
|                  "#a?a #a`a #a'a #a\\a #a{a #a[a #a]a #a}a #a~a #a;a #a:a #a\"a #a’a #a”a #\xa0cd" |                  "#a?a #a`a #a'a #a\\a #a{a #a[a #a]a #a}a #a~a #a;a #a:a #a\"a #a’a #a”a #\xa0cd" | ||||||
|         tags, text = find_tags(source) |         tags, text = find_tags(source) | ||||||
|         assert tags == set() |         assert tags == set() | ||||||
|  | @ -74,6 +83,14 @@ class TestFindTags: | ||||||
|         tags, text = find_tags(source, replacer=self._replacer) |         tags, text = find_tags(source, replacer=self._replacer) | ||||||
|         assert text == source |         assert text == source | ||||||
| 
 | 
 | ||||||
|  |     def test_start_of_paragraph_in_html_content(self): | ||||||
|  |         source = '<p>First line</p><p>#foobar #barfoo</p>' | ||||||
|  |         tags, text = find_tags(source) | ||||||
|  |         assert tags == {"foobar", "barfoo"} | ||||||
|  |         assert text == source | ||||||
|  |         tags, text = find_tags(source, replacer=self._replacer) | ||||||
|  |         assert text == '<p>First line</p><p>#foobar/foobar #barfoo/barfoo</p>' | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class TestProcessTextLinks: | class TestProcessTextLinks: | ||||||
|     def test_link_at_start_or_end(self): |     def test_link_at_start_or_end(self): | ||||||
|  | @ -97,6 +114,12 @@ class TestProcessTextLinks: | ||||||
|         assert process_text_links('<a href="/streams/tag/foobar">#foobar</a>') == \ |         assert process_text_links('<a href="/streams/tag/foobar">#foobar</a>') == \ | ||||||
|                '<a href="/streams/tag/foobar">#foobar</a>' |                '<a href="/streams/tag/foobar">#foobar</a>' | ||||||
| 
 | 
 | ||||||
|  |     def test_does_not_remove_mention_classes(self): | ||||||
|  |         assert process_text_links('<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" ' | ||||||
|  |                                   'class="u-url mention">@<span>jaywink</span></a></span> boom</p>') == \ | ||||||
|  |            '<p><span class="h-card"><a class="u-url mention" href="https://dev.jasonrobinson.me/u/jaywink/" ' \ | ||||||
|  |            'rel="nofollow" target="_blank">@<span>jaywink</span></a></span> boom</p>' | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def test_validate_handle(): | def test_validate_handle(): | ||||||
|     assert validate_handle("foo@bar.com") |     assert validate_handle("foo@bar.com") | ||||||
|  |  | ||||||
|  | @ -33,7 +33,9 @@ def find_tags(text: str, replacer: callable = None) -> Tuple[Set, str]: | ||||||
|     Returns a set of tags and the original or replaced text. |     Returns a set of tags and the original or replaced text. | ||||||
|     """ |     """ | ||||||
|     found_tags = set() |     found_tags = set() | ||||||
|     lines = text.splitlines(keepends=True) |     # <br> and <p> tags cause issues in us finding words - add some spacing around them | ||||||
|  |     new_text = text.replace("<br>", " <br> ").replace("<p>", " <p> ").replace("</p>", " </p> ") | ||||||
|  |     lines = new_text.splitlines(keepends=True) | ||||||
|     final_lines = [] |     final_lines = [] | ||||||
|     code_block = False |     code_block = False | ||||||
|     final_text = None |     final_text = None | ||||||
|  | @ -49,17 +51,28 @@ def find_tags(text: str, replacer: callable = None) -> Tuple[Set, str]: | ||||||
|         # Check each word separately |         # Check each word separately | ||||||
|         words = line.split(" ") |         words = line.split(" ") | ||||||
|         for word in words: |         for word in words: | ||||||
|             candidate = word.strip().strip("([]),.!?:") |             if word.find('#') > -1: | ||||||
|             if candidate.startswith("#"): |                 candidate = word.strip().strip("([]),.!?:*_%/") | ||||||
|                 candidate = candidate.strip("#") |                 if candidate.find('<') > -1 or candidate.find('>') > -1: | ||||||
|                 if test_tag(candidate.lower()): |                     # Strip html | ||||||
|                     found_tags.add(candidate.lower()) |                     candidate = bleach.clean(word, strip=True) | ||||||
|                     if replacer: |                 # Now split with slashes | ||||||
|                         try: |                 candidates = candidate.split("/") | ||||||
|                             tag_word = word.replace("#%s" % candidate, replacer(candidate)) |                 to_replace = [] | ||||||
|                             final_words.append(tag_word) |                 for candidate in candidates: | ||||||
|                         except Exception: |                     if candidate.startswith("#"): | ||||||
|                             final_words.append(word) |                         candidate = candidate.strip("#") | ||||||
|  |                         if test_tag(candidate.lower()): | ||||||
|  |                             found_tags.add(candidate.lower()) | ||||||
|  |                             to_replace.append(candidate) | ||||||
|  |                 if replacer: | ||||||
|  |                     tag_word = word | ||||||
|  |                     try: | ||||||
|  |                         for counter, replacee in enumerate(to_replace, 1): | ||||||
|  |                             tag_word = tag_word.replace("#%s" % replacee, replacer(replacee)) | ||||||
|  |                     except Exception: | ||||||
|  |                         pass | ||||||
|  |                     final_words.append(tag_word) | ||||||
|                 else: |                 else: | ||||||
|                     final_words.append(word) |                     final_words.append(word) | ||||||
|             else: |             else: | ||||||
|  | @ -67,6 +80,8 @@ def find_tags(text: str, replacer: callable = None) -> Tuple[Set, str]: | ||||||
|         final_lines.append(" ".join(final_words)) |         final_lines.append(" ".join(final_words)) | ||||||
|     if replacer: |     if replacer: | ||||||
|         final_text = "".join(final_lines) |         final_text = "".join(final_lines) | ||||||
|  |     if final_text: | ||||||
|  |         final_text = final_text.replace(" <br> ", "<br>").replace(" <p> ", "<p>").replace(" </p> ", "</p>") | ||||||
|     return found_tags, final_text or text |     return found_tags, final_text or text | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										1
									
								
								setup.py
								
								
								
								
							
							
						
						
									
										1
									
								
								setup.py
								
								
								
								
							|  | @ -36,7 +36,6 @@ setup( | ||||||
|         "lxml>=3.4.0", |         "lxml>=3.4.0", | ||||||
|         "ipdata>=3.0", |         "ipdata>=3.0", | ||||||
|         "iteration_utilities", |         "iteration_utilities", | ||||||
|         "markdownify", |  | ||||||
|         "jsonschema>=2.0.0", |         "jsonschema>=2.0.0", | ||||||
|         "pycryptodome>=3.4.10", |         "pycryptodome>=3.4.10", | ||||||
|         "python-dateutil>=2.4.0", |         "python-dateutil>=2.4.0", | ||||||
|  |  | ||||||
		Ładowanie…
	
		Reference in New Issue
	
	 jaywink
						jaywink