Extract json parser to core and use in fetch_actor (#663)

2023-11-20 13:46:51 -05:00 · 2023-11-20 13:46:51 -05:00 · b031880e41
commit b031880e41
--- a/activities/services/search.py
+++ b/activities/services/search.py
@ -1,8 +1,7 @@
-import json
-
 import httpx

 from activities.models import Hashtag, Post
+from core.json import json_from_response
 from core.ld import canonicalise
 from users.models import Domain, Identity, IdentityStates
 from users.models.system_actor import SystemActor
@ -17,32 +16,6 @@ class SearchService:
        self.query = query.strip()
        self.identity = identity

-    def _json(self, response: httpx.Response) -> dict | None:
-        content_type, *parameters = (
-            response.headers.get("Content-Type", "invalid").lower().split(";")
-        )
-
-        if content_type not in [
-            "application/json",
-            "application/ld+json",
-            "application/activity+json",
-        ]:
-            return None
-
-        charset = None
-
-        for parameter in parameters:
-            key, value = parameter.split("=")
-            if key.strip() == "charset":
-                charset = value.strip()
-
-        if charset:
-            return json.loads(response.content.decode(charset))
-        else:
-            # if no charset informed, default to
-            # httpx json encoding inference
-            return response.json()
-
    def search_identities_handle(self) -> set[Identity]:
        """
        Searches for identities by their handles
@ -110,7 +83,7 @@ class SearchService:
        if response.status_code >= 400:
            return None

-        json_data = self._json(response)
+        json_data = json_from_response(response)
        if not json_data:
            return None

--- a/core/json.py
+++ b/core/json.py
@ -0,0 +1,32 @@
+import json
+
+from httpx import Response
+
+JSON_CONTENT_TYPES = [
+    "application/json",
+    "application/ld+json",
+    "application/activity+json",
+]
+
+
+def json_from_response(response: Response) -> dict | None:
+    content_type, *parameters = (
+        response.headers.get("Content-Type", "invalid").lower().split(";")
+    )
+
+    if content_type not in JSON_CONTENT_TYPES:
+        return None
+
+    charset = None
+
+    for parameter in parameters:
+        key, value = parameter.split("=")
+        if key.strip() == "charset":
+            charset = value.strip()
+
+    if charset:
+        return json.loads(response.content.decode(charset))
+    else:
+        # if no charset informed, default to
+        # httpx json for encoding inference
+        return response.json()
--- a/tests/api/test_search.py
+++ b/tests/api/test_search.py
@ -44,7 +44,7 @@ test_account_json = r"""
   "featuredTags":"https://search.example.com/users/searchtest/collections/tags",
   "preferredUsername":"searchtest",
   "name":"searchtest",
-   "summary":"<p>The official searchtest account for the instance.</p>",
+   "summary":"<p>Just a test (àáâãäåæ)</p>",
   "url":"https://search.example.com/@searchtest",
   "manuallyApprovesFollowers":false,
   "discoverable":true,
@ -113,3 +113,4 @@ def test_search(
    assert len(response["accounts"]) == 1
    assert response["accounts"][0]["acct"] == "searchtest@search.example.com"
    assert response["accounts"][0]["username"] == "searchtest"
+    assert response["accounts"][0]["note"] == "<p>Just a test (àáâãäåæ)</p>"
--- a/users/models/identity.py
+++ b/users/models/identity.py
@ -14,6 +14,7 @@ from lxml import etree

 from core.exceptions import ActorMismatchError
 from core.html import ContentRenderer, FediverseHtmlParser
+from core.json import json_from_response
 from core.ld import (
    canonicalise,
    format_ld_date,
@ -878,8 +879,11 @@ class Identity(StatorModel):
                    "Client error fetching actor: %d %s", status_code, self.actor_uri
                )
            return False
+        json_data = json_from_response(response)
+        if not json_data:
+            return False
        try:
-            document = canonicalise(response.json(), include_security=True)
+            document = canonicalise(json_data, include_security=True)
        except ValueError:
            # servers with empty or invalid responses are inevitable
            logger.info(