kopia lustrzana https://gitlab.com/jaywink/federation
Allow '-' in tags. Make AP tag discovery more robust.
rodzic
54a8404c3d
commit
6fd445382d
|
@ -856,7 +856,7 @@ class Note(Object, RawContentMixin):
|
|||
parsed = urlparse(unquote(link['href']).lower())
|
||||
# remove the query part and trailing garbage, if any
|
||||
path = parsed.path
|
||||
trunc = re.match(r'(/[\w/]+)', parsed.path)
|
||||
trunc = re.match(r'(/[\w/\-]+)', parsed.path)
|
||||
if trunc:
|
||||
path = trunc.group()
|
||||
url = f'{parsed.scheme}://{parsed.netloc}{path}'
|
||||
|
@ -865,8 +865,9 @@ class Note(Object, RawContentMixin):
|
|||
normalized_url = f'{parsed.scheme}://{parsed.netloc}{normalized_path.decode()}'
|
||||
links = {link['href'].lower(), unquote(link['href']).lower(), url, normalized_url}
|
||||
if links.intersection(hrefs):
|
||||
tag = re.match(r'#?([\w]+)', link.text).group(1).lower()
|
||||
link['data-hashtag'] = tag
|
||||
tag = re.match(r'^#?([\w\-]+$)', link.text)
|
||||
if tag:
|
||||
link['data-hashtag'] = tag.group(1).lower()
|
||||
|
||||
def _find_and_mark_mentions(self):
|
||||
mentions = [mention for mention in self.tag_objects if isinstance(mention, Mention)]
|
||||
|
|
|
@ -7,7 +7,7 @@ from bs4.element import NavigableString
|
|||
from commonmark import commonmark
|
||||
|
||||
ILLEGAL_TAG_CHARS = "!#$%^&*+.,@£/()=?`'\\{[]}~;:\"’”—\xa0"
|
||||
TAG_PATTERN = re.compile(r'(#[\w]+)', re.UNICODE)
|
||||
TAG_PATTERN = re.compile(r'(#[\w\-]+)([)\]_!?*%/.,;\s]+\s*|\Z)', re.UNICODE)
|
||||
# This will match non matching braces. I don't think it's an issue.
|
||||
MENTION_PATTERN = re.compile(r'(@\{?(?:[\w\-. \u263a-\U0001f645]*; *)?[\w]+@[\w\-.]+\.[\w]+}?)', re.UNICODE)
|
||||
URL_PATTERN = re.compile(r'(^|[#*_\s])((?:https?://)?[\w\-.]+\.[\w]{1}[\w_\-.#?&/~@!$()*,;%=+]*)', re.UNICODE)
|
||||
|
@ -56,7 +56,8 @@ def find_elements(soup: BeautifulSoup, pattern: re.Pattern) -> List[NavigableStr
|
|||
ns = [NavigableString(r) for r in re.split(pattern, candidate.text)]
|
||||
if ns:
|
||||
candidate.replace_with(*ns)
|
||||
found.extend([child for child in parent.find_all(string=pattern) if child in ns])
|
||||
found.extend([child for child in parent.find_all(
|
||||
string=re.compile(r'\A'+pattern.pattern+r'\Z')) if child in ns])
|
||||
return found
|
||||
|
||||
|
||||
|
|
Ładowanie…
Reference in New Issue