add pagination info retrieval, iterator

2025-08-16 19:01:08 +03:00 · 2025-08-16 19:01:08 +03:00 · e757aa727e
commit e757aa727e
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@ -13,6 +13,8 @@ v2.1.0 (IN PROGRESS)
 * Added support for retrieving OAuth authorization server info
 * Added check for supported password grant type based on authorization server info (Thanks @thisismissem for the suggestion)
 * Added support for alternate OAuth URLs based on the authorization server info
+* Added pagination iteraton via `pagination_iterator` (Thanks @FredericoCeratto for the suggestion)
+* Added a way to get pagination info out of lists that is slightly less digging-around-in-internals via `get_pagination_info` (Thanks @s427 for the inciting report)

 v2.0.1
 ------
--- a/docs/12_utilities.rst
+++ b/docs/12_utilities.rst
@ -14,7 +14,11 @@ These functions allow for convenient retrieval of paginated data.
 .. automethod:: Mastodon.fetch_previous
 .. _fetch_remaining():    
 .. automethod:: Mastodon.fetch_remaining
-
+.. _pagination_iterator():
+.. automethod:: Mastodon.pagination_iterator
+.. _get_pagination_info():
+.. automethod:: Mastodon.get_pagination_info
+    
 Blurhash decoding
 -----------------
 This function allows for easy basic decoding of blurhash strings to images.
--- a/mastodon/utility.py
+++ b/mastodon/utility.py
@ -12,11 +12,13 @@ from mastodon.internals import Mastodon as Internals

 from mastodon.versions import parse_version_string, max_version, api_version

-from typing import Optional, Union, Dict
+from typing import Optional, Union, Dict, Iterator
 from mastodon.return_types import PaginatableList, PaginationInfo, PaginatableList
-from mastodon.types_base import Entity
+from mastodon.types_base import Entity, try_cast

 # Class level:
+
+
 class Mastodon(Internals):
    def set_language(self, lang):
        """
@ -32,23 +34,26 @@ class Mastodon(Internals):
        Returns the version string, possibly including rc info.
        """
        try:
-            version_str = self.__normalize_version_string(self.__instance()["version"])
+            version_str = self.__normalize_version_string(
+                self.__instance()["version"])
            self.__version_check_worked = True
        except Exception as e:
            # instance() was added in 1.1.0, so our best guess is 1.0.0.
            version_str = "1.0.0"
            self.__version_check_worked = False
-        self.mastodon_major, self.mastodon_minor, self.mastodon_patch = parse_version_string(version_str)
+        self.mastodon_major, self.mastodon_minor, self.mastodon_patch = parse_version_string(
+            version_str)

        # If the instance has an API version, we store that as well.
        # If we have a version >= 4.3.0 but no API version, we throw a warning that this is a Weird Implementation,
        # which might help with adoption of the API versioning or at least give us a better picture of how it is going.
        found_api_version = False
        try:
-            instance_v2_info = self.instance_v2() 
+            instance_v2_info = self.instance_v2()
            if "api_versions" in instance_v2_info:
                if "mastodon" in instance_v2_info["api_versions"]:
-                    self.mastodon_api_version = int(instance_v2_info["api_versions"]["mastodon"])
+                    self.mastodon_api_version = int(
+                        instance_v2_info["api_versions"]["mastodon"])
                    found_api_version = True
        except MastodonNotFoundError:
            pass
@ -56,7 +61,8 @@ class Mastodon(Internals):
            pass
        self.__version_check_tried = True
        if not found_api_version and self.verify_minimum_version("4.3.0", cached=True):
-            warnings.warn("Mastodon version is detected as >= 4.3.0, but no API version found. Please report this.")
+            warnings.warn(
+                "Mastodon version is detected as >= 4.3.0, but no API version found. Please report this.")
        return version_str

    def verify_minimum_version(self, version_str, cached=False):
@ -86,7 +92,8 @@ class Mastodon(Internals):
        """
        response = self.__api_request("HEAD", "/", return_response_object=True)
        if 'Date' in response.headers:
-            server_time_datetime = dateutil.parser.parse(response.headers['Date'])
+            server_time_datetime = dateutil.parser.parse(
+                response.headers['Date'])

            # Make sure we're in local time
            epoch_time = self.__datetime_to_epoch(server_time_datetime)
@ -119,7 +126,8 @@ class Mastodon(Internals):
                'To use the blurhash functions, please install the blurhash Python module.')

        # Figure out what size to decode to
-        decode_components_x, decode_components_y = blurhash.components(media_dict["blurhash"])
+        decode_components_x, decode_components_y = blurhash.components(
+            media_dict["blurhash"])
        if size_per_component:
            decode_size_x = decode_components_x * out_size[0]
            decode_size_y = decode_components_y * out_size[1]
@ -128,7 +136,8 @@ class Mastodon(Internals):
            decode_size_y = out_size[1]

        # Decode
-        decoded_image = blurhash.decode(media_dict["blurhash"], decode_size_x, decode_size_y, linear=return_linear)
+        decoded_image = blurhash.decode(
+            media_dict["blurhash"], decode_size_x, decode_size_y, linear=return_linear)

        # And that's pretty much it.
        return decoded_image
@ -136,7 +145,7 @@ class Mastodon(Internals):
    ###
    # Pagination
    ###
-    def fetch_next(self, previous_page: Union[PaginatableList[Entity], Entity, Dict]) -> Optional[Union[PaginatableList[Entity], Entity]]:
+    def fetch_next(self, previous_page: Union[PaginatableList[Entity], Entity, PaginationInfo]) -> Optional[Union[PaginatableList[Entity], Entity]]:
        """
        Fetches the next page of results of a paginated request. Pass in the
        previous page in its entirety, or the pagination information dict
@ -161,7 +170,8 @@ class Mastodon(Internals):
                is_pagination_dict = True

        if not "_pagination_method" in params and not "_pagination_endpoint" in params:
-            raise MastodonIllegalArgumentError("The passed object is not paginatable")
+            raise MastodonIllegalArgumentError(
+                "The passed object is not paginatable")

        method = params['_pagination_method']
        del params['_pagination_method']
@ -183,7 +193,7 @@ class Mastodon(Internals):
        else:
            return self.__api_request(method, endpoint, params, override_type=response_type)

-    def fetch_previous(self, next_page: Union[PaginatableList[Entity], Entity, Dict]) -> Optional[Union[PaginatableList[Entity], Entity]]:
+    def fetch_previous(self, next_page: Union[PaginatableList[Entity], Entity, PaginationInfo]) -> Optional[Union[PaginatableList[Entity], Entity]]:
        """
        Fetches the previous page of results of a paginated request. Pass in the
        previous page in its entirety, or the pagination information dict
@ -208,8 +218,9 @@ class Mastodon(Internals):
                is_pagination_dict = True

        if not "_pagination_method" in params and not "_pagination_endpoint" in params:
-            raise MastodonIllegalArgumentError("The passed object is not paginatable")
-        
+            raise MastodonIllegalArgumentError(
+                "The passed object is not paginatable")
+
        method = params['_pagination_method']
        del params['_pagination_method']

@ -224,7 +235,7 @@ class Mastodon(Internals):
        force_pagination = False
        if not isinstance(next_page, list):
            force_pagination = True
-        
+
        if not is_pagination_dict:
            return self.__api_request(method, endpoint, params, force_pagination=force_pagination, override_type=response_type)
        else:
@ -239,8 +250,9 @@ class Mastodon(Internals):
        Be careful, as this might generate a lot of requests, depending on what you are
        fetching, and might cause you to run into rate limits very quickly.

-        Does not currently work with grouped notifications, please deal with those
-        yourself, for now.
+        Does not work with grouped notifications, since they use a somewhat weird, inside-out
+        pagination scheme. If you need to access these in a paginated way, use fetch_next and fetch_previous
+        directly.
        """
        first_page = copy.deepcopy(first_page)

@ -251,3 +263,60 @@ class Mastodon(Internals):
            current_page = self.fetch_next(current_page)

        return all_pages
+
+    def get_pagination_info(self, page: PaginatableList[Entity], pagination_direction: str) -> Optional[PaginationInfo]:
+        """
+        Extracts pagination information from a paginated response.
+
+        Returns a PaginationInfo dictionary containing pagination information, or None if not available.
+
+        The resulting PaginationInfo is best treated as opaque, though is unlikely to change.
+        """
+        if hasattr(page, "_pagination_next") and pagination_direction == "next":
+            return try_cast(PaginationInfo, page._pagination_next)
+        elif hasattr(page, "_pagination_prev") and pagination_direction == "previous":
+            return try_cast(PaginationInfo, page._pagination_prev)
+        else:
+            return None
+
+    def pagination_iterator(self, start_page: Union[PaginatableList[Entity], PaginationInfo], direction: str = "next", return_pagination_info: bool = False) -> Iterator[Entity]:
+        """
+        Returns an iterator that will yield all entries in a paginated request,
+        starting from the given start_page (can also be just the PaginationInfo, in which case the
+        first returned thing will be the result of fetch_next or fetch_previous, depending on the direction).
+        and fetching new pages as needed, and breaks when no more pages are available.
+
+        Set direction to "next" to iterate forward, or "previous" to iterate backwards.
+
+        If return_pagination_info is True, the iterator will instead yield tuples of (Entity, PaginationInfo),
+        where PaginationInfo is a dictionary containing pagination information for the current page and direction.
+
+        Does not work with grouped notifications, since they use a somewhat weird, inside-out
+        pagination scheme. If you need to access these in a paginated way, use fetch_next and fetch_previous
+        directly.
+        """
+        if direction not in ["next", "previous"]:
+            raise MastodonIllegalArgumentError(
+                "Invalid pagination direction: {}".format(direction))
+        
+        # Don't rely on python type info here, this is a Danger Zone. Instead, check for
+        # _pagination_endpoint
+        if hasattr(start_page, "_pagination_endpoint") or (isinstance(start_page, dict) and '_pagination_endpoint' in start_page):
+            current_page = self.fetch_next(
+                start_page) if direction == "next" else self.fetch_previous(start_page)
+        else:
+            current_page = start_page
+
+        while current_page is not None and len(current_page) > 0:
+            for entry in current_page:
+                if return_pagination_info:
+                    yield (entry, self.get_pagination_info(current_page, direction))
+                else:
+                    print("CURRENT PAGE IS", current_page)
+                    print("YIELDING ENTRY: ", entry)
+                    yield entry
+
+            if direction == "next":
+                current_page = self.fetch_next(current_page)
+            else:
+                current_page = self.fetch_previous(current_page)
--- a/tests/cassettes/test_get_pagination_info.yaml
+++ b/tests/cassettes/test_get_pagination_info.yaml
--- a/tests/cassettes/test_pagination_iterator.yaml
+++ b/tests/cassettes/test_pagination_iterator.yaml
--- a/tests/test_pagination.py
+++ b/tests/test_pagination.py
@ -11,7 +11,7 @@ import requests_mock

 UNLIKELY_HASHTAG = "fgiztsshwiaqqiztpmmjbtvmescsculuvmgjgopwoeidbcrixp"

-from mastodon.types_base import Entity
+from mastodon.types_base import Entity, PaginationInfo

@contextmanager
 def many_statuses(api, n=10, suffix=''):
@ -119,3 +119,51 @@ def test_link_headers(api):
    resp = api.timeline_hashtag(UNLIKELY_HASHTAG)
    assert resp._pagination_next['max_id'] == _id
    assert resp._pagination_prev['since_id'] == _id
+
+@pytest.mark.vcr()
+def test_get_pagination_info(api):
+    account = api.account_verify_credentials()
+    with many_statuses(api):
+        statuses = api.account_statuses(account['id'], limit=5)
+        pagination_info = api.get_pagination_info(statuses, "next")
+        assert pagination_info
+        assert pagination_info['max_id'] == statuses._pagination_next['max_id']
+        assert isinstance(pagination_info, PaginationInfo)
+        pagination_info = api.get_pagination_info(statuses, "previous")
+        assert pagination_info
+        assert pagination_info['min_id'] == statuses._pagination_prev['min_id']
+        assert isinstance(pagination_info, PaginationInfo)
+    empty_dict = {}
+    assert api.get_pagination_info(empty_dict, "next") is None
+
+@pytest.mark.vcr()
+def test_pagination_iterator(api3):
+    with many_statuses(api3, n=30, suffix=' #'+UNLIKELY_HASHTAG):
+        hashtag = api3.timeline_hashtag(UNLIKELY_HASHTAG, limit=10)
+        iterator = api3.pagination_iterator(hashtag, "next")
+        assert iterator
+        for status in iterator:
+            print(status)
+            assert UNLIKELY_HASHTAG in status['content']
+            assert type(status) == type(hashtag[0])
+        iterator = api3.pagination_iterator(hashtag._pagination_prev, "previous")
+        assert iterator
+        for status in iterator:
+            print(status)
+            assert UNLIKELY_HASHTAG in status['content']
+            assert type(status) == type(hashtag[0])
+
+        # Test with pagination info    
+        pagination_info = hashtag._pagination_next
+        iterator = api3.pagination_iterator(pagination_info, "next")
+        assert iterator
+        for status in iterator:
+            assert UNLIKELY_HASHTAG in status['content']
+            assert type(status) == type(hashtag[0])
+        pagination_info = hashtag._pagination_prev
+        iterator = api3.pagination_iterator(pagination_info, "previous")
+        assert iterator
+        for status in iterator:
+            assert UNLIKELY_HASHTAG in status['content']
+            assert type(status) == type(hashtag[0])
+