Examples¶

Below are some examples of building data visualizations and other neat things using iNaturalist data. These can also be found in the examples/ folder on GitHub.

Notebooks¶

Example Jupter notebooks. To run these more easily, see inaturalist-notebook for a Jupyter container with all prerequisites installed.

Scripts¶

Convert observations to GPX¶

An example of converting observation locations + metadata into GPX format.

Extra dependencies:: pip install gpxpy

Example code

#!/usr/bin/env python3
from logging import getLogger

from gpxpy.gpx import GPX, GPXTrack, GPXTrackPoint, GPXTrackSegment, GPXWaypoint

from pyinaturalist.constants import JsonResponse, List
from pyinaturalist.formatters import format_observations
from pyinaturalist.node_api import get_all_observations, get_observations  # noqa
from pyinaturalist.response_format import convert_observation_timestamps

logger = getLogger(__name__)


def observations_to_gpx(
    observations: List[JsonResponse], output_file: str = "observations.gpx", track: bool = True
):
    """Convert a list of observations to a set of GPX waypoints or a GPX track

    Args:
        observations: JSON observations
        output_file: File path to write to
        track: Create an ordered GXP track; otherwise, create unordered GPX waypoints
    """
    gpx = GPX()
    logger.info(f"Converting {len(observations)} to GPX points")
    points = [observation_to_gpx_point(obs, track=track) for obs in observations]

    if track:
        gpx_track = GPXTrack()
        gpx.tracks.append(gpx_track)
        gpx_segment = GPXTrackSegment()
        gpx_track.segments.append(gpx_segment)
        gpx_segment.points = points
    else:
        gpx.waypoints = points

    # Save to file
    logger.info(f"Writing GPX data to {output_file}")
    with open(output_file, "w") as f:
        f.write(gpx.to_xml())


def observation_to_gpx_point(observation: JsonResponse, track: bool = True):
    """Convert a single observation to a GPX point

    Args:
        observation: JSON observation
        track: Indicates that this point is part of an ordered GXP track;
            otherwise, assume it is an unordered waypoint

    """
    logger.debug(f'Processing observation {observation["id"]}')
    # GeoJSON coordinates are ordered as `longitude, latitude`
    long, lat = observation["geojson"]["coordinates"]

    # Get medium-sized photo URL, if available; otherwise just use observation URL
    if observation["photos"]:
        link = observation["photos"][0]["url"].replace("square", "medium")
    else:
        link = observation["uri"]

    point_cls = GPXTrackPoint if track else GPXWaypoint
    point = point_cls(
        latitude=lat,
        longitude=long,
        time=convert_observation_timestamps(observation),
        comment=format_observations(observation),
    )
    point.description = observation["description"]
    point.link = link
    point.link_text = f'Observation {observation["id"]}'
    return point


if __name__ == "__main__":
    # Get first page of search results (for testing)
    search_params = {
        "project_id": 36883,  # ID of the 'Sugarloaf Ridge State Park' project
        "created_d1": "2020-01-01",  # Get observations from January 2020...
        "created_d2": "2020-09-30",  # ...through September 2020 (adjust these dates as needed)
        "geo": True,  # Only get observations with geospatial coordinates
        "geoprivacy": "open",  # Only get observations with public coordinates (not obscured/private)
    }
    results = get_observations(**search_params)["results"]

    # Paginate through all search results (may take a long time for a large query)
    # results = get_all_observations(**search_params)

    # Convert and write to GPX file
    observations_to_gpx(results)
    # observations_to_tsp(results)

Observation photo metadata¶

An example of getting photo metadata from observation photos, which is currently not available in the API and must be done by web scraping.

Note that web scraping in general is not very reliable and is prone to breakage, so this script may or may not work without modification. See examples/sample_data/photo_info.html for an example of photo info HTML at time of writing.

Also note that photo metadata is only visible to logged in users, so an access token is required. For more details on authentication, see:

Extra dependencies:: pip install beautifulsoup4

Example code

#!/usr/bin/env python3
from pprint import pprint

import requests
from bs4 import BeautifulSoup

from pyinaturalist.node_api import get_observation
from pyinaturalist.rest_api import get_access_token

IGNORE_ATTRIBUTES = ['Associated observations', 'Sizes']
PHOTO_INFO_BASE_URL = 'https://www.inaturalist.org/photos'


def get_photo_metadata(photo_url, access_token):
    """Scrape content from a photo info URL, and attempt to get its metadata"""
    print(f'Fetching {photo_url}')
    photo_page = requests.get(photo_url, headers={'Authorization': f'Bearer {access_token}'})
    soup = BeautifulSoup(photo_page.content, 'html.parser')
    table = soup.find(id='wrapper').find_all('table')[1]

    metadata = {}
    for row in table.find_all('tr'):
        key = row.find('th').text.strip()
        value = row.find('td').text.strip()
        if value and key not in IGNORE_ATTRIBUTES:
            metadata[key] = value
    return metadata


def get_observation_photo_metadata(observation_id, access_token):
    """Attempt to scrape metadata from all photo info pages associated with an observation"""
    print(f'Fetching observation {observation_id}')
    obs = get_observation(observation_id)
    photo_ids = [photo['id'] for photo in obs.get('photos', [])]
    photo_urls = [f'{PHOTO_INFO_BASE_URL}/{id}' for id in photo_ids]
    print(f'{len(photo_urls)} photo URL(s) found')
    return [get_photo_metadata(url, access_token) for url in photo_urls]


# !! Replace values here !!
if __name__ == '__main__':
    observation_id = 99
    access_token = get_access_token(
        username='',
        password='',
        app_id='',
        app_secret='',
    )
    all_metadata = get_observation_photo_metadata(observation_id, access_token)
    pprint(all_metadata, indent=4)