kopia lustrzana https://github.com/jointakahe/takahe
Add language support to posts
rodzic
32216315aa
commit
808838707a
|
@ -0,0 +1,18 @@
|
|||
# Generated by Django 4.2.1 on 2023-05-15 09:26
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("activities", "0016_index_together_migration"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="post",
|
||||
name="language",
|
||||
field=models.CharField(max_length=2, null=True),
|
||||
),
|
||||
]
|
|
@ -31,6 +31,7 @@ from core.html import ContentRenderer, FediverseHtmlParser
|
|||
from core.ld import (
|
||||
canonicalise,
|
||||
format_ld_date,
|
||||
get_language,
|
||||
get_list,
|
||||
get_value_or_map,
|
||||
parse_ld_date,
|
||||
|
@ -252,6 +253,9 @@ class Post(StatorModel):
|
|||
# The main (HTML) content
|
||||
content = models.TextField()
|
||||
|
||||
# The language of the content
|
||||
language = models.CharField(max_length=2, null=True)
|
||||
|
||||
type = models.CharField(
|
||||
max_length=20,
|
||||
choices=Types.choices,
|
||||
|
@ -474,6 +478,7 @@ class Post(StatorModel):
|
|||
reply_to: Optional["Post"] = None,
|
||||
attachments: list | None = None,
|
||||
question: dict | None = None,
|
||||
language: str | None = None,
|
||||
) -> "Post":
|
||||
with transaction.atomic():
|
||||
# Find mentions in this post
|
||||
|
@ -492,6 +497,9 @@ class Post(StatorModel):
|
|||
sorted([tag[: Hashtag.MAXIMUM_LENGTH] for tag in parser.hashtags])
|
||||
or None
|
||||
)
|
||||
if language is None:
|
||||
language = author.config_identity.preferred_posting_language
|
||||
|
||||
# Make the Post object
|
||||
post = cls.objects.create(
|
||||
author=author,
|
||||
|
@ -502,6 +510,7 @@ class Post(StatorModel):
|
|||
visibility=visibility,
|
||||
hashtags=hashtags,
|
||||
in_reply_to=reply_to.object_uri if reply_to else None,
|
||||
language=language,
|
||||
)
|
||||
post.object_uri = post.urls.object_uri
|
||||
post.url = post.absolute_object_uri()
|
||||
|
@ -526,6 +535,7 @@ class Post(StatorModel):
|
|||
visibility: int = Visibilities.public,
|
||||
attachments: list | None = None,
|
||||
attachment_attributes: list | None = None,
|
||||
language: str | None = None,
|
||||
):
|
||||
with transaction.atomic():
|
||||
# Strip all HTML and apply linebreaks filter
|
||||
|
@ -538,6 +548,9 @@ class Post(StatorModel):
|
|||
self.summary = summary or None
|
||||
self.sensitive = bool(summary) if sensitive is None else sensitive
|
||||
self.visibility = visibility
|
||||
if language is None:
|
||||
language = self.author.config_identity.preferred_posting_language
|
||||
self.language = language
|
||||
self.edited = timezone.now()
|
||||
self.mentions.set(self.mentions_from_content(content, self.author))
|
||||
self.emojis.set(Emoji.emojis_from_content(content, None))
|
||||
|
@ -649,6 +662,10 @@ class Post(StatorModel):
|
|||
"tag": [],
|
||||
"attachment": [],
|
||||
}
|
||||
if self.language is not None:
|
||||
value["contentMap"] = {
|
||||
self.language: value["content"],
|
||||
}
|
||||
if self.type == Post.Types.question and self.type_data:
|
||||
value[self.type_data.mode] = [
|
||||
{
|
||||
|
@ -872,6 +889,7 @@ class Post(StatorModel):
|
|||
post.published = parse_ld_date(data.get("published"))
|
||||
post.edited = parse_ld_date(data.get("updated"))
|
||||
post.in_reply_to = data.get("inReplyTo")
|
||||
post.language = get_language(data)
|
||||
# Mentions and hashtags
|
||||
post.hashtags = []
|
||||
for tag in get_list(data, "tag"):
|
||||
|
@ -1112,6 +1130,7 @@ class Post(StatorModel):
|
|||
"created_at": format_ld_date(self.published),
|
||||
"account": self.author.to_mastodon_json(include_counts=False),
|
||||
"content": self.safe_content_remote(),
|
||||
"language": self.language,
|
||||
"visibility": visibility_mapping[self.visibility],
|
||||
"sensitive": self.sensitive,
|
||||
"spoiler_text": self.summary or "",
|
||||
|
@ -1152,7 +1171,6 @@ class Post(StatorModel):
|
|||
if isinstance(self.type_data, QuestionData)
|
||||
else None,
|
||||
"card": None,
|
||||
"language": None,
|
||||
"text": self.safe_content_remote(),
|
||||
"edited_at": format_ld_date(self.edited) if self.edited else None,
|
||||
}
|
||||
|
|
|
@ -151,7 +151,7 @@ class Status(Schema):
|
|||
reblog: Optional["Status"] = Field(...)
|
||||
poll: Poll | None = Field(...)
|
||||
card: None = Field(...)
|
||||
language: None = Field(...)
|
||||
language: str | None = Field(...)
|
||||
text: str | None = Field(...)
|
||||
edited_at: str | None
|
||||
favourited: bool = False
|
||||
|
|
|
@ -110,6 +110,7 @@ def post_status(request, details: PostStatusSchema) -> schemas.Status:
|
|||
reply_to=reply_post,
|
||||
attachments=attachments,
|
||||
question=details.poll.dict() if details.poll else None,
|
||||
language=details.language,
|
||||
)
|
||||
# Add their own timeline event for immediate visibility
|
||||
TimelineEvent.add_post(request.identity, post)
|
||||
|
@ -141,6 +142,7 @@ def edit_status(request, id: str, details: EditStatusSchema) -> schemas.Status:
|
|||
sensitive=details.sensitive,
|
||||
attachments=attachments,
|
||||
attachment_attributes=details.media_attributes,
|
||||
language=details.language,
|
||||
)
|
||||
return schemas.Status.from_post(post)
|
||||
|
||||
|
|
22
core/ld.py
22
core/ld.py
|
@ -1,5 +1,6 @@
|
|||
import datetime
|
||||
import os
|
||||
import re
|
||||
import urllib.parse as urllib_parse
|
||||
|
||||
from dateutil import parser
|
||||
|
@ -692,3 +693,24 @@ def media_type_from_filename(filename):
|
|||
return "image/webp"
|
||||
else:
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def get_language(data) -> str | None:
|
||||
"""Detects and returns a document's language"""
|
||||
map_ = None
|
||||
if "contentMap" in data:
|
||||
map_ = data["contentMap"]
|
||||
elif "nameMap" in data:
|
||||
map_ = data["nameMap"]
|
||||
elif "summaryMap" in data:
|
||||
map_ = data["summaryMap"]
|
||||
|
||||
if not map_:
|
||||
return None
|
||||
|
||||
lang = list(map_.keys())[0]
|
||||
if not lang or lang == "und":
|
||||
return None
|
||||
|
||||
lang = re.split("-|_", lang)[0]
|
||||
return lang.lower()
|
||||
|
|
|
@ -259,6 +259,7 @@ def test_content_map(remote_identity):
|
|||
create=True,
|
||||
)
|
||||
assert post.content == "Hi World"
|
||||
assert post.language is None
|
||||
|
||||
post2 = Post.by_ap(
|
||||
data={
|
||||
|
@ -271,6 +272,7 @@ def test_content_map(remote_identity):
|
|||
create=True,
|
||||
)
|
||||
assert post2.content == "Hey World"
|
||||
assert post2.language is None
|
||||
|
||||
post3 = Post.by_ap(
|
||||
data={
|
||||
|
@ -283,6 +285,7 @@ def test_content_map(remote_identity):
|
|||
create=True,
|
||||
)
|
||||
assert post3.content == "Hello World"
|
||||
assert post3.language == "en"
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
|
|
|
@ -2,7 +2,7 @@ import datetime
|
|||
|
||||
from dateutil.tz import tzutc
|
||||
|
||||
from core.ld import parse_ld_date
|
||||
from core.ld import get_language, parse_ld_date
|
||||
|
||||
|
||||
def test_parse_ld_date():
|
||||
|
@ -41,3 +41,41 @@ def test_parse_ld_date():
|
|||
tzinfo=tzutc(),
|
||||
)
|
||||
assert difference.total_seconds() == 0
|
||||
|
||||
|
||||
def test_get_language():
|
||||
assert (
|
||||
get_language(
|
||||
{
|
||||
"contentMap": {
|
||||
"en": "<p>Hello</p>",
|
||||
"es": "<p>hola</p>",
|
||||
},
|
||||
"nameMap": {"de": "Hallo"},
|
||||
"summaryMap": {"fr": "Bonjour"},
|
||||
}
|
||||
)
|
||||
== "en"
|
||||
)
|
||||
assert (
|
||||
get_language(
|
||||
{
|
||||
"nameMap": {"de": "Hallo"},
|
||||
"summaryMap": {"fr": "Bonjour"},
|
||||
}
|
||||
)
|
||||
== "de"
|
||||
)
|
||||
assert (
|
||||
get_language(
|
||||
{
|
||||
"summaryMap": {"fr": "Bonjour"},
|
||||
}
|
||||
)
|
||||
== "fr"
|
||||
)
|
||||
assert get_language({"contentMap": {"en-gb": "<p>Hello</p>"}}) == "en"
|
||||
assert get_language({"contentMap": {"en_GB": "<p>Hello</p>"}}) == "en"
|
||||
assert get_language({"contentMap": {"EN": "<p>Hello</p>"}}) == "en"
|
||||
assert get_language({"contentMap": {"und": "<p>Hello</p>"}}) is None
|
||||
assert get_language({}) is None
|
||||
|
|
Ładowanie…
Reference in New Issue