Examples

Below are some examples of building data visualizations and other neat things using iNaturalist data. These can also be found in the examples/ folder on GitHub.

Notebooks

Example Jupyter notebooks. Click the badge below to try them out in your browser using Binder:

https://mybinder.org/badge_logo.svg

This uses the pyinaturalist-notebook Docker image, which you can also use to run these examples on a local Jupyter server.

Scripts

Convert observations to GPX

An example of converting observation locations + metadata into GPX format.

Extra dependencies:

pip install gpxpy

Example code
#!/usr/bin/env python3

from logging import getLogger

from gpxpy.gpx import GPX, GPXTrack, GPXTrackPoint, GPXTrackSegment, GPXWaypoint

from pyinaturalist import Observation, iNatClient
from pyinaturalist.constants import List

logger = getLogger(__name__)


def observations_to_gpx(
    observations: List[Observation], output_file: str = 'observations.gpx', track: bool = True
):
    """Convert a list of observations to a set of GPX waypoints or a GPX track

    Args:
        observations: List of Observation objects
        output_file: File path to write to
        track: Create an ordered GXP track; otherwise, create unordered GPX waypoints
    """
    gpx = GPX()
    logger.info(f'Converting {len(observations)} to GPX points')
    points = [observation_to_gpx_point(obs, track=track) for obs in observations]
    # Filter out any None points (observations without location)
    points = [point for point in points if point is not None]

    if track:
        gpx_track = GPXTrack()
        gpx.tracks.append(gpx_track)
        gpx_segment = GPXTrackSegment()
        gpx_track.segments.append(gpx_segment)
        gpx_segment.points = points
    else:
        gpx.waypoints = points

    # Save to file
    logger.info(f'Writing GPX data to {output_file}')
    with open(output_file, 'w') as f:
        f.write(gpx.to_xml())


def observation_to_gpx_point(observation: Observation, track: bool = True):
    """Convert a single observation to a GPX point

    Args:
        observation: Observation object
        track: Indicates that this point is part of an ordered GXP track;
            otherwise, assume it is an unordered waypoint

    """
    logger.debug(f'Processing observation {observation.id}')
    # Get coordinates from observation location
    if not observation.location:
        logger.warning(f'Observation {observation.id} has no location, skipping')
        return None

    lat, long = observation.location

    # Get medium-sized photo URL, if available; otherwise just use observation URL
    if observation.photos:
        link = observation.photos[0].medium_url or observation.photos[0].url
    else:
        link = observation.uri

    point_cls = GPXTrackPoint if track else GPXWaypoint
    point = point_cls(
        latitude=lat,
        longitude=long,
        time=observation.observed_on,
        comment=str(observation),
    )
    point.description = observation.description
    point.link = link
    point.link_text = f'Observation {observation.id}'
    return point


if __name__ == '__main__':
    # Create a client for API requests
    client = iNatClient()

    # Get search results
    search_params = {
        'project_id': 36883,  # ID of the 'Sugarloaf Ridge State Park' project
        'created_d1': '2020-01-01',  # Get observations from January 2020...
        'created_d2': '2020-09-30',  # ...through September 2020 (adjust these dates as needed)
        'geo': True,  # Only get observations with geospatial coordinates
        'geoprivacy': 'open',  # Only get observations with public coordinates (not obscured/private)
    }
    results = client.observations.search(**search_params).all()

    # Convert and write to GPX file
    observations_to_gpx(results)
    # observations_to_tsp(results)

Observation photo metadata

An example of getting photo metadata from observation photos, which is currently not available in the API and must be done by web scraping.

Note that web scraping in general is not very reliable and is prone to breakage, so this script may or may not work without modification. See examples/sample_data/photo_info.html for an example of photo info HTML at time of writing.

Also note that photo metadata is only visible to logged in users, so an access token is required. For more details on authentication, see:

Extra dependencies:

Example code
#!/usr/bin/env python3
"""

from pprint import pprint

import requests
from bs4 import BeautifulSoup

from pyinaturalist import get_access_token, iNatClient

IGNORE_ATTRIBUTES = ['Associated observations', 'Sizes']
PHOTO_INFO_BASE_URL = 'https://www.inaturalist.org/photos'


def get_photo_metadata(photo_url, access_token):
    """Scrape content from a photo info URL, and attempt to get its metadata"""
    print(f'Fetching {photo_url}')
    photo_page = requests.get(photo_url, headers={'Authorization': f'Bearer {access_token}'})
    soup = BeautifulSoup(photo_page.content, 'html.parser')
    table = soup.find(id='wrapper').find_all('table')[1]

    metadata = {}
    for row in table.find_all('tr'):
        key = row.find('th').text.strip()
        value = row.find('td').text.strip()
        if value and key not in IGNORE_ATTRIBUTES:
            metadata[key] = value
    return metadata


def get_observation_photo_metadata(observation_id, access_token):
    """Attempt to scrape metadata from all photo info pages associated with an observation"""
    print(f'Fetching observation {observation_id}')
    client = iNatClient()
    obs = client.observations(observation_id)
    photo_ids = [photo.id for photo in obs.photos]
    photo_urls = [f'{PHOTO_INFO_BASE_URL}/{id}' for id in photo_ids]
    print(f'{len(photo_urls)} photo URL(s) found')
    return [get_photo_metadata(url, access_token) for url in photo_urls]


# !! Replace values here !!
if __name__ == '__main__':
    observation_id = 99
    access_token = get_access_token(
        username='',
        password='',
        app_id='',
        app_secret='',
    )
    all_metadata = get_observation_photo_metadata(observation_id, access_token)
    pprint(all_metadata, indent=4)