From f5e040d7463d11cfa91a84b14ac4735ba774b498 Mon Sep 17 00:00:00 2001 From: Marnanel Thurman Date: Wed, 9 Sep 2020 23:55:55 +0100 Subject: [PATCH] Sombrero's fetch() can now retrieve Collections and OrderedCollections. Tests updated. --- kepi/sombrero_sendpub/collections.py | 139 +++++++++++++++++++++ kepi/sombrero_sendpub/fetch.py | 13 ++ kepi/sombrero_sendpub/tests/test_fetch.py | 142 ++++++++++++++++++++++ 3 files changed, 294 insertions(+) create mode 100644 kepi/sombrero_sendpub/collections.py diff --git a/kepi/sombrero_sendpub/collections.py b/kepi/sombrero_sendpub/collections.py new file mode 100644 index 0000000..dc77833 --- /dev/null +++ b/kepi/sombrero_sendpub/collections.py @@ -0,0 +1,139 @@ +# collections.py +# +# Part of kepi. +# Copyright (c) 2018-2020 Marnanel Thurman. +# Licensed under the GNU Public License v2. + +import logging +logger = logging.getLogger(name="kepi") + +class ErsatzModel(object): + + @classmethod + def remote_form(cls): + # This exists for compatibility with the models + # defined in trilby_api. + return cls + + def save(self): + pass + + def __init__(self, + url): + + self.url = url + self.status = 0 + + def update(self, found): + """ + Update this object with information retrieved from + the remote server. + """ + pass + +class _CollectionPage(ErsatzModel): + + def update(self, found): + self.items = found.get('items', []) + self.items.extend(found.get('orderedItems', [])) + + self.next = found.get('next', None) + +class Collection(ErsatzModel): + + """ + Used in ActivityPub to represent a collection of objects. + It's not helpful to represent this as a Django model, because + it can change so unpredictably from moment to moment. + + Collections can be iterated over. + + Some Collections are paged: in these cases + the main object contains a pointer to a series of + CollectionPages. We don't represent CollectionPages + directly, but the iterator knows how to handle them. + + A Collection can be ordered or unordered. We treat these + identically at present. + + The behaviour of Collections in general is defined at + https://www.w3.org/TR/activitystreams-core/#paging , + and their use in ActivityPub is defined at + https://www.w3.org/TR/activitypub/#collections . + """ + + def update(self, found): + """ + Update this Collection with information retrieved from + the remote server. + """ + + if found['type'] not in ['Collection', 'OrderedCollection']: + raise ValueError("Type %s isn't a collection", + found['type']) + + if found['id']!=self.url: + raise ValueError("id mismatch: wanted %s, got %s" % ( + self.url, found['id'])) + + for fieldname in [ + 'totalItems', 'first', + 'prev', 'next', + ]: + if fieldname in found: + setattr(self, fieldname, found[fieldname]) + else: + setattr(self, fieldname, None) + + self.items = found.get('items', []) + self.items.extend(found.get('orderedItems', [])) + + def __len__(self): + + if self.totalItems is None: + raise ValueError("%s: totalItems wasn't supplied", + self.url) + + return self.totalItems + + def __iter__(self): + + try: + self._iter_items = self.items.copy() + self._next_page = self.first + logger.debug("%s: iteration: begin with %s", + self.url, self._iter_items) + + except AttributeError: + self._iter_items = None + self._next_page = None + logger.info("%s: iteration: no content loaded", self.url) + + return self + + def __next__(self): + if self._iter_items: + return self._iter_items.pop(0) + + if self._next_page is None: + logger.debug("%s: iteration: finished!", + self.url) + raise StopIteration + + logger.debug("%s: iteration: fetching %s...", + self.url, self._next_page) + + import kepi.sombrero_sendpub.fetch as fetch + + next_bit = fetch.fetch( + self._next_page, + expected_type = _CollectionPage, + ) + + self._iter_items = next_bit.items + self._next_page = next_bit.next + + logger.debug(' -- containing %s', + self._iter_items) + + return self._iter_items.pop(0) diff --git a/kepi/sombrero_sendpub/fetch.py b/kepi/sombrero_sendpub/fetch.py index 9287d9d..4ee0e28 100644 --- a/kepi/sombrero_sendpub/fetch.py +++ b/kepi/sombrero_sendpub/fetch.py @@ -15,6 +15,7 @@ from urllib.parse import urlparse from kepi.trilby_api.models import * from kepi.bowler_pub.utils import log_one_message from kepi.sombrero_sendpub.webfinger import get_webfinger +import kepi.sombrero_sendpub.collections as sombrero_collections def fetch(address, expected_type, @@ -192,6 +193,10 @@ def _fetch_remote(address, wanted): return result + except AttributeError: + # Types don't have to support object lookup + pass + except wanted['type'].DoesNotExist: pass @@ -395,3 +400,11 @@ def on_person(found, user): return user on_actor = on_person + +def on_collection(found, obj): + obj.update(found) + return obj + +on_collection_page = on_collection +on_orderedcollection = on_collection +on_orderedcollectionpage = on_collection diff --git a/kepi/sombrero_sendpub/tests/test_fetch.py b/kepi/sombrero_sendpub/tests/test_fetch.py index 5b5ea86..50c7ef0 100644 --- a/kepi/sombrero_sendpub/tests/test_fetch.py +++ b/kepi/sombrero_sendpub/tests/test_fetch.py @@ -13,6 +13,7 @@ from django.conf import settings from kepi.sombrero_sendpub.fetch import fetch from kepi.trilby_api.models import RemotePerson, Person, Status from kepi.trilby_api.tests import create_local_person +from kepi.sombrero_sendpub.collections import Collection from . import suppress_thread_exceptions import httpretty import requests @@ -104,6 +105,67 @@ EXAMPLE_WEBFINGER_RESULT_NO_USER = """{"subject":"acct:wombat@example.org", {"rel":"http://ostatus.org/schema/1.0/subscribe", "template":"https://example.org/authorize_interaction?uri={uri}"}]}""" +EXAMPLE_SIMPLE_COLLECTION_MEMBERS = [ + 'apple', 'banana', + 'coconut', 'damson', 'elderberry', + ] +EXAMPLE_SIMPLE_COLLECTION_URL = "https://example.com/fruits" +EXAMPLE_SIMPLE_COLLECTION = """{ +"@context":"https://www.w3.org/ns/activitystreams", +"id":"%s", +"type":"OrderedCollection", +"totalItems":5, +"orderedItems": ["apple", "banana", "coconut", "damson", "elderberry"] +}""" % (EXAMPLE_SIMPLE_COLLECTION_URL,) + +EXAMPLE_COMPLEX_COLLECTION_MEMBERS = sorted([ + "Bolton", "Bury", "Oldham", "Manchester", "Rochdale", + "Salford", "Stockport", "Tameside", "Trafford", "Wigan" + ]) +EXAMPLE_COMPLEX_COLLECTION_URL = "https://example.com/boroughs" +EXAMPLE_COMPLEX_COLLECTION = """{ +"@context":"https://www.w3.org/ns/activitystreams", +"id":"%s", +"type":"OrderedCollection", +"totalItems":10, +"first":"%s/1" +}""" % ( + EXAMPLE_COMPLEX_COLLECTION_URL, + EXAMPLE_COMPLEX_COLLECTION_URL, + ) + +EXAMPLE_COMPLEX_COLLECTION_PAGE_1 = """{ +"@context":"https://www.w3.org/ns/activitystreams", +"id":"%s/1", +"type":"OrderedCollectionPage", +"totalItems":10, +"partOf":"%s", +"next":"%s/2", +"orderedItems": [ + "Bolton", "Bury", "Oldham", "Manchester", "Rochdale" + ] +}""" % ( + EXAMPLE_COMPLEX_COLLECTION_URL, + EXAMPLE_COMPLEX_COLLECTION_URL, + EXAMPLE_COMPLEX_COLLECTION_URL, + ) + +EXAMPLE_COMPLEX_COLLECTION_PAGE_2 = """{ +"@context":"https://www.w3.org/ns/activitystreams", +"id":"%s/2", +"type":"OrderedCollectionPage", +"totalItems":10, +"partOf":"%s", +"prev":"%s/2", +"orderedItems": [ + "Salford", "Stockport", "Tameside", "Trafford", "Wigan" + ] +}""" % ( + EXAMPLE_COMPLEX_COLLECTION_URL, + EXAMPLE_COMPLEX_COLLECTION_URL, + EXAMPLE_COMPLEX_COLLECTION_URL, + ) + class TestFetchRemoteUser(TestCase): @httpretty.activate @@ -394,6 +456,86 @@ class TestFetchRemoteUser(TestCase): 0, ) + @httpretty.activate + def test_fetch_simple_collection(self): + httpretty.register_uri( + 'GET', + EXAMPLE_SIMPLE_COLLECTION_URL, + status=200, + headers = { + 'Content-Type': 'application/activity+json', + }, + body = EXAMPLE_SIMPLE_COLLECTION, + ) + + collection = fetch(EXAMPLE_SIMPLE_COLLECTION_URL, + expected_type = Collection) + + self.assertEqual( + sorted(collection), + EXAMPLE_SIMPLE_COLLECTION_MEMBERS, + msg="Collection can be iterated") + + self.assertEqual( + sorted(collection), + EXAMPLE_SIMPLE_COLLECTION_MEMBERS, + msg="Collection can be iterated twice") + + self.assertEqual( + len(collection), + len(EXAMPLE_SIMPLE_COLLECTION_MEMBERS), + msg="Collection has a length") + + @httpretty.activate + def test_fetch_complex_collection(self): + httpretty.register_uri( + 'GET', + EXAMPLE_COMPLEX_COLLECTION_URL, + status=200, + headers = { + 'Content-Type': 'application/activity+json', + }, + body = EXAMPLE_COMPLEX_COLLECTION, + ) + + httpretty.register_uri( + 'GET', + EXAMPLE_COMPLEX_COLLECTION_URL+"/1", + status=200, + headers = { + 'Content-Type': 'application/activity+json', + }, + body = EXAMPLE_COMPLEX_COLLECTION_PAGE_1, + ) + + httpretty.register_uri( + 'GET', + EXAMPLE_COMPLEX_COLLECTION_URL+"/2", + status=200, + headers = { + 'Content-Type': 'application/activity+json', + }, + body = EXAMPLE_COMPLEX_COLLECTION_PAGE_2, + ) + + collection = fetch(EXAMPLE_COMPLEX_COLLECTION_URL, + expected_type = Collection) + + self.assertEqual( + sorted(collection), + EXAMPLE_COMPLEX_COLLECTION_MEMBERS, + msg="Collection can be iterated") + + self.assertEqual( + sorted(collection), + EXAMPLE_COMPLEX_COLLECTION_MEMBERS, + msg="Collection can be iterated twice") + + self.assertEqual( + len(collection), + len(EXAMPLE_COMPLEX_COLLECTION_MEMBERS), + msg="Collection has a length") + class TestFetchLocalUser(TestCase): def setUp(self):