From 226a60bec7f3009bbe9a759342e1c181c90b7311 Mon Sep 17 00:00:00 2001 From: Humberto Rocha Date: Sat, 24 Jun 2023 10:53:42 -0400 Subject: [PATCH] Fix canonicalize (#590) --- core/ld.py | 52 +++++++++++++++++++++---------- tests/core/test_ld.py | 67 +++++++++++++++++++++++++++++++++++++++- users/models/identity.py | 10 +++--- 3 files changed, 106 insertions(+), 23 deletions(-) diff --git a/core/ld.py b/core/ld.py index 8bbc088..6baac29 100644 --- a/core/ld.py +++ b/core/ld.py @@ -547,6 +547,18 @@ schemas = { } }, }, + "schema.org": { + "contentType": "application/ld+json", + "documentUrl": "https://schema.org/docs/jsonldcontext.json", + "contextUrl": None, + "document": { + "@context": { + "schema": "http://schema.org/", + "PropertyValue": {"@id": "schema:PropertyValue"}, + "value": {"@id": "schema:value"}, + }, + }, + }, } DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.Z" @@ -592,24 +604,32 @@ def canonicalise(json_data: dict, include_security: bool = False) -> dict: """ if not isinstance(json_data, dict): raise ValueError("Pass decoded JSON data into LDDocument") - context = [ - "https://www.w3.org/ns/activitystreams", - { - "blurhash": "toot:blurhash", - "Emoji": "toot:Emoji", - "focalPoint": {"@container": "@list", "@id": "toot:focalPoint"}, - "Hashtag": "as:Hashtag", - "manuallyApprovesFollowers": "as:manuallyApprovesFollowers", - "sensitive": "as:sensitive", - "toot": "http://joinmastodon.org/ns#", - "votersCount": "toot:votersCount", - "featured": {"@id": "toot:featured", "@type": "@id"}, - }, - ] + + context = json_data.get("@context", []) + + if not isinstance(context, list): + context = [context] + + if not context: + context.append("https://www.w3.org/ns/activitystreams") + context.append( + { + "blurhash": "toot:blurhash", + "Emoji": "toot:Emoji", + "focalPoint": {"@container": "@list", "@id": "toot:focalPoint"}, + "Hashtag": "as:Hashtag", + "manuallyApprovesFollowers": "as:manuallyApprovesFollowers", + "sensitive": "as:sensitive", + "toot": "http://joinmastodon.org/ns#", + "votersCount": "toot:votersCount", + "featured": {"@id": "toot:featured", "@type": "@id"}, + } + ) + if include_security: context.append("https://w3id.org/security/v1") - if "@context" not in json_data: - json_data["@context"] = context + + json_data["@context"] = context return jsonld.compact(jsonld.expand(json_data), context) diff --git a/tests/core/test_ld.py b/tests/core/test_ld.py index 4f12317..3579f86 100644 --- a/tests/core/test_ld.py +++ b/tests/core/test_ld.py @@ -2,7 +2,7 @@ import datetime from dateutil.tz import tzutc -from core.ld import parse_ld_date +from core.ld import canonicalise, parse_ld_date def test_parse_ld_date(): @@ -41,3 +41,68 @@ def test_parse_ld_date(): tzinfo=tzutc(), ) assert difference.total_seconds() == 0 + + +def test_canonicalise_single_attachment(): + data = { + "@context": [ + "https://www.w3.org/ns/activitystreams", + { + "schema": "http://schema.org#", + "PropertyValue": "schema:PropertyValue", + "value": "schema:value", + }, + ], + "attachment": [ + { + "type": "http://schema.org#PropertyValue", + "name": "Location", + "http://schema.org#value": "Test Location", + }, + ], + } + + parsed = canonicalise(data) + attachment = parsed["attachment"] + + assert attachment["type"] == "PropertyValue" + assert attachment["name"] == "Location" + assert attachment["value"] == "Test Location" + + +def test_canonicalise_multiple_attachment(): + data = { + "@context": [ + "https://www.w3.org/ns/activitystreams", + { + "schema": "http://schema.org#", + "PropertyValue": "schema:PropertyValue", + "value": "schema:value", + }, + ], + "attachment": [ + { + "type": "http://schema.org#PropertyValue", + "name": "Attachment 1", + "http://schema.org#value": "Test 1", + }, + { + "type": "http://schema.org#PropertyValue", + "name": "Attachment 2", + "http://schema.org#value": "Test 2", + }, + ], + } + + parsed = canonicalise(data) + attachment = parsed["attachment"] + + assert len(attachment) == 2 + + assert attachment[0]["type"] == "PropertyValue" + assert attachment[0]["name"] == "Attachment 1" + assert attachment[0]["value"] == "Test 1" + + assert attachment[1]["type"] == "PropertyValue" + assert attachment[1]["name"] == "Attachment 2" + assert attachment[1]["value"] == "Test 2" diff --git a/users/models/identity.py b/users/models/identity.py index 69283c6..12cf665 100644 --- a/users/models/identity.py +++ b/users/models/identity.py @@ -854,16 +854,14 @@ class Identity(StatorModel): self.metadata = [] for attachment in get_list(document, "attachment"): if ( - attachment["type"] == "http://schema.org#PropertyValue" + attachment["type"] == "PropertyValue" and "name" in attachment - and "http://schema.org#value" in attachment + and "value" in attachment ): self.metadata.append( { - "name": attachment.get("name"), - "value": FediverseHtmlParser( - attachment.get("http://schema.org#value") - ).html, + "name": attachment["name"], + "value": FediverseHtmlParser(attachment["value"]).html, } ) # Now go do webfinger with that info to see if we can get a canonical domain