From 81d019ad0db2daeae63f856c7f483f83229d98ad Mon Sep 17 00:00:00 2001 From: Humberto Rocha Date: Sun, 19 Nov 2023 13:32:35 -0500 Subject: [PATCH] Improve search api json parsing (#662) --- activities/services/search.py | 40 +++++++++--- tests/api/test_search.py | 115 ++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 7 deletions(-) create mode 100644 tests/api/test_search.py diff --git a/activities/services/search.py b/activities/services/search.py index 35c5e80..4807f24 100644 --- a/activities/services/search.py +++ b/activities/services/search.py @@ -1,3 +1,5 @@ +import json + import httpx from activities.models import Hashtag, Post @@ -15,6 +17,32 @@ class SearchService: self.query = query.strip() self.identity = identity + def _json(self, response: httpx.Response) -> dict | None: + content_type, *parameters = ( + response.headers.get("Content-Type", "invalid").lower().split(";") + ) + + if content_type not in [ + "application/json", + "application/ld+json", + "application/activity+json", + ]: + return None + + charset = None + + for parameter in parameters: + key, value = parameter.split("=") + if key.strip() == "charset": + charset = value.strip() + + if charset: + return json.loads(response.content.decode(charset)) + else: + # if no charset informed, default to + # httpx json encoding inference + return response.json() + def search_identities_handle(self) -> set[Identity]: """ Searches for identities by their handles @@ -81,14 +109,12 @@ class SearchService: return None if response.status_code >= 400: return None - content_type = response.headers.get("Content-Type", "").lower() - if content_type not in [ - "application/json", - "application/ld+json", - "application/activity+json", - ]: + + json_data = self._json(response) + if not json_data: return None - document = canonicalise(response.json(), include_security=True) + + document = canonicalise(json_data, include_security=True) type = document.get("type", "unknown").lower() # Is it an identity? diff --git a/tests/api/test_search.py b/tests/api/test_search.py new file mode 100644 index 0000000..0cbf825 --- /dev/null +++ b/tests/api/test_search.py @@ -0,0 +1,115 @@ +import pytest +from pytest_httpx import HTTPXMock + +test_account_json = r""" +{ + "@context":[ + "https://www.w3.org/ns/activitystreams", + "https://w3id.org/security/v1", + { + "manuallyApprovesFollowers":"as:manuallyApprovesFollowers", + "toot":"http://joinmastodon.org/ns#", + "featured":{ + "@id":"toot:featured", + "@type":"@id" + }, + "featuredTags":{ + "@id":"toot:featuredTags", + "@type":"@id" + }, + "movedTo":{ + "@id":"as:movedTo", + "@type":"@id" + }, + "schema":"http://schema.org#", + "PropertyValue":"schema:PropertyValue", + "value":"schema:value", + "discoverable":"toot:discoverable", + "Device":"toot:Device", + "deviceId":"toot:deviceId", + "messageType":"toot:messageType", + "cipherText":"toot:cipherText", + "suspended":"toot:suspended", + "memorial":"toot:memorial", + "indexable":"toot:indexable" + } + ], + "id":"https://search.example.com/users/searchtest", + "type":"Person", + "following":"https://search.example.com/users/searchtest/following", + "followers":"https://search.example.com/users/searchtest/followers", + "inbox":"https://search.example.com/users/searchtest/inbox", + "outbox":"https://search.example.com/users/searchtest/outbox", + "featured":"https://search.example.com/users/searchtest/collections/featured", + "featuredTags":"https://search.example.com/users/searchtest/collections/tags", + "preferredUsername":"searchtest", + "name":"searchtest", + "summary":"

The official searchtest account for the instance.

", + "url":"https://search.example.com/@searchtest", + "manuallyApprovesFollowers":false, + "discoverable":true, + "indexable":false, + "published":"2018-05-09T00:00:00Z", + "memorial":false, + "devices":"https://search.example.com/users/searchtest/collections/devices", + "endpoints":{ + "sharedInbox":"https://search.example.com/inbox" + } +} +""" + + +@pytest.mark.django_db +def test_search_not_found(httpx_mock: HTTPXMock, api_client): + httpx_mock.add_response(status_code=404) + response = api_client.get( + "/api/v2/search", + content_type="application/json", + data={ + "q": "https://notfound.example.com", + }, + ).json() + + assert response["accounts"] == [] + assert response["statuses"] == [] + assert response["hashtags"] == [] + + +@pytest.mark.django_db +@pytest.mark.parametrize( + "encoding", + [ + "utf-8", + "iso-8859-1", + ], +) +@pytest.mark.parametrize( + "content_type", + [ + "application/json", + "application/ld+json", + "application/activity+json", + ], +) +def test_search( + content_type: str, + encoding: str, + httpx_mock: HTTPXMock, + api_client, +): + httpx_mock.add_response( + headers={"Content-Type": f"{content_type}; charset={encoding}"}, + content=test_account_json.encode(encoding), + ) + + response = api_client.get( + "/api/v2/search", + content_type="application/json", + data={ + "q": "https://search.example.com/users/searchtest", + }, + ).json() + + assert len(response["accounts"]) == 1 + assert response["accounts"][0]["acct"] == "searchtest@search.example.com" + assert response["accounts"][0]["username"] == "searchtest"