Source code for pyinaturalist.pagination

from functools import wraps
from logging import getLogger
from math import ceil
from time import sleep
from typing import Callable

from pyinaturalist.constants import (
    EXPORT_URL,
    LARGE_REQUEST_WARNING,
    PER_PAGE_RESULTS,
    REQUESTS_PER_MINUTE,
    THROTTLING_DELAY,
    JsonResponse,
)

logger = getLogger(__name__)


[docs]def add_paginate_all(method: str = 'page'): """Decorator that adds auto-pagination support, invoked by passing ``page='all'`` to the wrapped API function. """ def decorator(func): @wraps(func) def wrapper(*args, **params): if params.get('page') == 'all': return paginate_all(func, *args, method=method, **params) return func(*args, **params) return wrapper return decorator
[docs]def paginate_all(api_func: Callable, *args, method: str = 'page', **params) -> JsonResponse: """Get all pages of a multi-page request. Explicit pagination parameters will be overridden. Args: api_func: API endpoint function to paginate method: Pagination method; either 'page', 'id', or 'autocomplete' (see below) params: Original request parameters Note on pagination by ID, from the iNaturalist documentation: _'The large size of the observations index prevents us from supporting the page parameter when retrieving records from large result sets. If you need to retrieve large numbers of records, use the ``per_page`` and ``id_above`` or ``id_below`` parameters instead.'_ Returns: Response dict containing combined results, in the same format as ``api_func`` """ params.pop('page', None) if method == 'autocomplete': return paginate_autocomplete(api_func, *args, **params) if method == 'id': params['order_by'] = 'id' params['order'] = 'asc' else: params['page'] = 1 params['per_page'] = PER_PAGE_RESULTS # Run an initial request to get request size response = api_func(**params) results = page_results = response['results'] total_results = response.get('total_results') estimate_request_size(total_results) # Some endpoints (like get_observation_fields) don't return total_results for some reason # Also check page size, in case total_results is off (race condition, outdated index, etc.) def check_results(): more_results = total_results is None or len(results) < total_results return more_results and len(page_results) > 0 # Loop until we get all pages while check_results(): if method == 'id': params['id_above'] = page_results[-1]['id'] else: params['page'] += 1 page_results = api_func(**params).get('results', []) results += page_results sleep(THROTTLING_DELAY) return { 'results': results, 'total_results': len(results), }
[docs]def paginate_autocomplete(api_func: Callable, *args, **params) -> JsonResponse: """Attempt to get as many results as possible from the places autocomplete endpoint. This is necessary for some problematic places for which there are many matches but not ranked with the desired match(es) first. This works based on different rankings being returned for order_by=area. No other fields can be sorted on, and direction can't be specified, but this can at least provide a few additional results beyond the limit of 20. """ params['per_page'] = 20 params.pop('order_by', None) # Search with default ordering and ordering by area (if there are more than 20 results) page_1 = api_func(*args, **params) if page_1['total_results'] > 20: page_2 = api_func(*args, **params, order_by='area') else: page_2 = {'results': []} # De-duplicate results unique_results = {r['id']: r for page in [page_1, page_2] for r in page['results']} return { 'results': list(unique_results.values()), 'total_results': page_1['total_results'], }
[docs]def estimate_request_size(total_results): """Log the estimated total number of requests and rate-limiting delay, and show a warning if the request is too large """ if not total_results: return total_requests = ceil(total_results / PER_PAGE_RESULTS) est_delay = ceil((total_requests / REQUESTS_PER_MINUTE) * 60) logger.info( f'This query will fetch {total_results} results in {total_requests} requests. ' f'Estimated total rate-limiting delay: {est_delay} seconds' ) if total_results > LARGE_REQUEST_WARNING: logger.warning( 'This request is larger than recommended for API usage. For bulk requests, consider ' f'using the iNat export tool instead: {EXPORT_URL}' )