kopia lustrzana https://github.com/halcy/Mastodon.py
add status length counter
rodzic
974844bacb
commit
2174694464
|
@ -16,6 +16,7 @@ v2.1.0 (IN PROGRESS)
|
|||
* Added pagination iteraton via `pagination_iterator` (Thanks @FredericoCeratto for the suggestion)
|
||||
* Added a way to get pagination info out of lists that is slightly less digging-around-in-internals via `get_pagination_info` (Thanks @s427 for the inciting report)
|
||||
* Added missing `replies_policy` and `exclusive` parameters to list creation and update methods.
|
||||
* Add status length counter `get_status_length` (Thanks @yuletide for the suggestion)
|
||||
|
||||
v2.0.1
|
||||
------
|
||||
|
|
|
@ -33,3 +33,4 @@ Cache control
|
|||
Other utilities
|
||||
---------------
|
||||
.. automethod:: Mastodon.get_approx_server_time
|
||||
.. automethod:: Mastodon.get_status_length
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -45,3 +45,9 @@ except:
|
|||
class Path:
|
||||
pass
|
||||
|
||||
IMPL_HAS_GRAPHEME = True
|
||||
try:
|
||||
import grapheme
|
||||
except:
|
||||
IMPL_HAS_GRAPHEME = False
|
||||
grapheme = None
|
||||
|
|
|
@ -7,7 +7,7 @@ import copy
|
|||
import warnings
|
||||
|
||||
from mastodon.errors import MastodonAPIError, MastodonIllegalArgumentError, MastodonNotFoundError, MastodonVersionError
|
||||
from mastodon.compat import IMPL_HAS_BLURHASH, blurhash
|
||||
from mastodon.compat import IMPL_HAS_BLURHASH, blurhash, IMPL_HAS_GRAPHEME, grapheme
|
||||
from mastodon.internals import Mastodon as Internals
|
||||
|
||||
from mastodon.versions import parse_version_string, max_version, api_version
|
||||
|
@ -16,8 +16,8 @@ from typing import Optional, Union, Dict, Iterator
|
|||
from mastodon.return_types import PaginatableList, PaginationInfo, PaginatableList
|
||||
from mastodon.types_base import Entity, try_cast
|
||||
|
||||
# Class level:
|
||||
|
||||
from ._url_regex import url_regex
|
||||
import unicodedata
|
||||
|
||||
class Mastodon(Internals):
|
||||
def set_language(self, lang):
|
||||
|
@ -320,3 +320,30 @@ class Mastodon(Internals):
|
|||
current_page = self.fetch_next(current_page)
|
||||
else:
|
||||
current_page = self.fetch_previous(current_page)
|
||||
|
||||
@staticmethod
|
||||
def get_status_length(text: str, spoiler_text: str = "") -> int:
|
||||
"""
|
||||
For a given status `text` and `spoiler_text`, return how many characters this status counts as
|
||||
when computing the status length and comparing it against the limit.
|
||||
|
||||
Note that there are other limits you may run into, such as the maximum length of a URL, or the
|
||||
maximum length of a usernames domain part. But as long as you do *normal* things, this function
|
||||
will return the correct length for the status text.
|
||||
"""
|
||||
if not IMPL_HAS_GRAPHEME:
|
||||
raise NotImplementedError(
|
||||
'To use the get_status_length function, please install the grapheme Python module.')
|
||||
|
||||
username_regex = re.compile(r'(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9\.\-]+[a-z0-9]+)', re.IGNORECASE)
|
||||
|
||||
def countable_text(input_text: str) -> str:
|
||||
# Transform text such that it has the correct length for counting
|
||||
# post text lengths against the limit
|
||||
def _url_repl(m: re.Match) -> str:
|
||||
return m.group(2) + ("x" * 23)
|
||||
text = url_regex.sub(_url_repl, input_text)
|
||||
text = username_regex.sub(r'\1@\3', text)
|
||||
return text
|
||||
|
||||
return grapheme.length(countable_text(text)) + grapheme.length(spoiler_text)
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from mastodon import Mastodon
|
||||
|
||||
TEST_CASES = [
|
||||
# Simple
|
||||
("", 0),
|
||||
("hello", 5),
|
||||
(" leading and trailing spaces ", 31),
|
||||
(" tabs\tand\nnewlines\r\n", 19),
|
||||
|
||||
# URLs - schemes, TLDs, IPv4/IPv6, ports, creds
|
||||
("check http://example.com and https://example.org/page?x=1#frag", 1000 - 943),
|
||||
("ftp://files.example.net/resource", 1000 - 968),
|
||||
("http://user:pass@example.com:8080/path", 1000 - 962),
|
||||
("http://127.0.0.1:3000/health", 1000 - 972),
|
||||
("https://[2001:db8::1]/status", 1000 - 972),
|
||||
("https://[2001:db8:85a3::8a2e:370:7334]:443/path?ok=1", 1000 - 948),
|
||||
("mailto:someone@example.com", 1000 - 974),
|
||||
("git+ssh://git@example.co.uk:22/repo.git", 1000 - 961),
|
||||
("https://very.long.tld.example.museum/collection/item", 1000 - 977),
|
||||
|
||||
# Usernames - local and remote
|
||||
("@alice", 6),
|
||||
("@bob@example.com", 4),
|
||||
("hi @charlie and @dora@example.social!", 1000 -978),
|
||||
|
||||
# Mixed
|
||||
("hey @me@example.com look at https://example.com/a-b_c~d?e=f#g and @you ", 50),
|
||||
|
||||
# Grapheme cluster vs code point differences
|
||||
("a: 🇪🇪", 4),
|
||||
("b: 👨👩👧👦", 4),
|
||||
("c: 👩🏽💻", 4),
|
||||
("d: ✊🏿", 4),
|
||||
("é", 1),
|
||||
("f\u0301", 1),
|
||||
|
||||
# Stress-tests
|
||||
("https://sub.sub2.пример.рф/путь/страница?параметр=значение#якорь", 47),
|
||||
("clusters: 😀😃😄😁😆😅😂🤣😊🙂😉🙃😇🥰😍🤩😘😗😙😚", 30),
|
||||
|
||||
# Varied compositions
|
||||
("See: http://example.com https://[2001:db8::2]:8443/a ftp://user:pw@files.example.org:21/x http://192.168.0.1/", 1000 - 886),
|
||||
("@one https://example.social/@two 👩🏽💻 🇪🇪 @three@example.com ✊🏿", 1000 - 959),
|
||||
|
||||
# Edge punctuation around URLs/usernames
|
||||
("(see https://example.com.)", 30),
|
||||
("[link: http://user:pass@host.example:8080/path?x=y#z]", 1000 - 947),
|
||||
("<@root> and {@admin@example.net}", 20),
|
||||
("https://example.com/a-b_c~d?param_a=1¶m-b=2", 1000 - 977),
|
||||
]
|
||||
|
||||
@pytest.mark.parametrize("text,expected", TEST_CASES)
|
||||
def test_get_status_length_against_ground_truth(text, expected):
|
||||
assert Mastodon.get_status_length(text) == expected
|
||||
assert Mastodon.get_status_length(text, "what") == expected + 4
|
2
tox.ini
2
tox.ini
|
@ -4,5 +4,5 @@ skipsdist = true
|
|||
|
||||
|
||||
[testenv]
|
||||
deps = .[test,webpush,blurhash]
|
||||
deps = .[test,webpush,blurhash,grapheme]
|
||||
commands = python setup.py test
|
||||
|
|
Ładowanie…
Reference in New Issue