Choropleth example¶

This is an example of creating choropleth maps using Altair.

import csv
import json
from pathlib import Path

import altair as alt
from vega_datasets import data as vega_data

from pyinaturalist import iNatClient

# Change to any species or state
TAXON_NAME = 'Buteo jamaicensis'  # Red-tailed hawk
STATE_ABBREV = 'IA'

taxon_slug = TAXON_NAME.replace(' ', '_').lower()
SAMPLE_DATA_DIR = Path('.').parent / 'sample_data'
COUNTY_CACHE_FILE = Path(f'{taxon_slug}_{STATE_ABBREV}_counties.json')
COUNTY_ID_LOOKUP = SAMPLE_DATA_DIR / 'us_county_place_ids.csv'

STATE_CACHE_FILE = Path(f'{taxon_slug}_us_states.json')
STATE_ID_LOOKUP = SAMPLE_DATA_DIR / 'us_state_place_ids.csv'
STATE_POPULATION_LOOKUP = SAMPLE_DATA_DIR / 'us_state_populations.csv'
EXCLUDED_STATE_FIPS = {2, 15}  # Exclude AK and HI for contiguous US map

client = iNatClient()

County-level map¶

We’ll start by visualizing observations per county within a given state.

Sample data is included that maps US county FIPS codes to iNaturalist place IDs. We’ll start by loading that and then fetching observation counts per county:

def load_county_lookup(state_abbrev: str) -> dict[str, int]:
    """Return {5-digit-FIPS-string: inat_place_id} for counties in the given state."""
    with open(COUNTY_ID_LOOKUP, newline='') as f:
        return {
            row['fips_code'].zfill(5): int(row['inat_place_id'])
            for row in csv.DictReader(f)
            if row['state_abbr'] == state_abbrev
        }


def fetch_county_counts(fips_to_inat: dict[str, int]) -> dict[str, int]:
    """Fetch observation counts per county, returning {fips: count}."""
    counts: dict[str, int] = {}
    total = len(fips_to_inat)
    for i, (fips, place_id) in enumerate(fips_to_inat.items(), start=1):
        print(f'  County {i}/{total} (FIPS {fips}, place_id {place_id})')
        count = client.observations.search(
            taxon_name=TAXON_NAME,
            place_id=place_id,
            verifiable=True,
        ).count()
        counts[fips] = count
    return counts


def load_or_fetch_county_counts(state_abbrev: str) -> dict[str, int]:
    """Load county counts from cache if available, otherwise fetch from API."""
    if COUNTY_CACHE_FILE.exists():
        print(f'Loading county counts from cache: {COUNTY_CACHE_FILE}')
        with open(COUNTY_CACHE_FILE) as f:
            return json.load(f)

    print(f'Fetching county observation counts for {state_abbrev}...')
    fips_to_inat = load_county_lookup(state_abbrev)
    counts = fetch_county_counts(fips_to_inat)

    with open(COUNTY_CACHE_FILE, 'w') as f:
        json.dump(counts, f)
    print(f'Cached to {COUNTY_CACHE_FILE}')
    return counts


counts = load_or_fetch_county_counts(STATE_ABBREV)

Loading county counts from cache: buteo_jamaicensis_IA_counties.json

Then we can map these counts over the vega-data US 1m dataset.

Reference docs:

# Convert counts to a list of records for Altair
with open(COUNTY_ID_LOOKUP, newline='') as f:
    fips_to_name = {
        row['fips_code'].zfill(5): row['county_name']
        for row in csv.DictReader(f)
        if row['state_abbr'] == STATE_ABBREV
    }

count_records = [
    {'fips': int(fips), 'count': count, 'name': fips_to_name.get(fips, fips)}
    for fips, count in counts.items()
]
counties_topo = alt.topo_feature(vega_data.us_10m.url, 'counties')
state_fips_prefix = {fips[:2] for fips in counts}
state_fips_int = int(next(iter(state_fips_prefix)))

chart = (
    alt.Chart(counties_topo)
    .mark_geoshape()
    .transform_filter(f'floor(datum.id / 1000) == {state_fips_int}')
    .transform_lookup(
        lookup='id',
        from_=alt.LookupData(
            data=alt.InlineData(values=count_records),
            key='fips',
            fields=['count', 'name'],
        ),
    )
    .encode(
        color=alt.Color(
            'count:Q',
            scale=alt.Scale(scheme='viridis'),
            legend=alt.Legend(title='Observations'),
        ),
        tooltip=[
            alt.Tooltip('name:N', title='County'),
            alt.Tooltip('count:Q', title='Observations'),
        ],
    )
    .project('albersUsa')
    .properties(
        title=f'{TAXON_NAME} observations by county — {STATE_ABBREV}',
        width=700,
        height=500,
    )
)

# Optionally export to HTML:
# chart.save(f'{STATE_ABBREV}_counties.html')

chart

State-level map¶

Next, we will visualize observations per state within the US.

Note: Using the same tools, you can map county-level data for the whole country, but since that requires 3000+ API calls, we’ll just stick to state-level data for the purposes of this demo.

Sample data is included that maps US state FIPS codes to iNaturalist place IDs. We’ll start by loading that and then fetching observation counts per state:

def load_state_lookup() -> dict[int, int]:
    """Return {state_fips: inat_place_id} for the 48 contiguous US states."""
    with open(STATE_ID_LOOKUP, newline='') as f:
        return {
            int(row['state_fips']): int(row['inat_place_id'])
            for row in csv.DictReader(f)
            if int(row['state_fips']) not in EXCLUDED_STATE_FIPS
        }


def fetch_state_counts(state_map: dict[int, int]) -> dict[int, int]:
    """Fetch observation counts per state, returning {state_fips: count}."""
    counts: dict[int, int] = {}
    total = len(state_map)
    for i, (fips, place_id) in enumerate(state_map.items(), start=1):
        print(f'  State {i}/{total} (FIPS {fips}, place_id {place_id})')
        count = client.observations.search(
            taxon_name=TAXON_NAME,
            place_id=place_id,
            verifiable=True,
        ).count()
        counts[fips] = count
    return counts


def load_or_fetch_state_counts() -> dict[int, int]:
    """Load state counts from cache if available, otherwise fetch from API."""
    if STATE_CACHE_FILE.exists():
        print(f'Loading state counts from cache: {STATE_CACHE_FILE}')
        with open(STATE_CACHE_FILE) as f:
            # JSON keys are strings; convert back to int
            return {int(k): v for k, v in json.load(f).items()}

    print('Fetching state-level observation counts for contiguous US...')
    state_map = load_state_lookup()
    counts = fetch_state_counts(state_map)

    with open(STATE_CACHE_FILE, 'w') as f:
        json.dump(counts, f)
    print(f'Cached to {STATE_CACHE_FILE}')
    return counts


counts = load_or_fetch_state_counts()

Loading state counts from cache: buteo_jamaicensis_us_states.json

with open(STATE_POPULATION_LOOKUP, newline='') as f:
    fips_to_state_name = {int(row['state_fips']): row['state_name'] for row in csv.DictReader(f)}

count_records = [
    {'fips': fips, 'count': count, 'name': fips_to_state_name.get(fips, str(fips))}
    for fips, count in counts.items()
]
states_topo = alt.topo_feature(vega_data.us_10m.url, 'states')

chart = (
    alt.Chart(states_topo)
    .mark_geoshape()
    .transform_lookup(
        lookup='id',
        from_=alt.LookupData(
            data=alt.InlineData(values=count_records),
            key='fips',
            fields=['count', 'name'],
        ),
    )
    .encode(
        color=alt.Color(
            'count:Q',
            scale=alt.Scale(scheme='viridis'),
            legend=alt.Legend(title='Observations'),
        ),
        tooltip=[
            alt.Tooltip('name:N', title='State'),
            alt.Tooltip('count:Q', title='Observations'),
        ],
    )
    .project('albersUsa')
    .properties(
        title=f'{TAXON_NAME} observations by state — contiguous US',
        width=900,
        height=500,
    )
)


# Optionally export to HTML:
# chart.save('us_states.html')

chart

Normalized state-level map¶

Like most observation maps, the results are heavily correlated with observer density. Let’s normalize by state population to compensate for that.

Source: 2020 US Census apportionment data

# {state_fips: population} and {state_fips: state_name} for all 50 states
with open(STATE_POPULATION_LOOKUP, newline='') as f:
    pop_data = list(csv.DictReader(f))
    populations = {int(row['state_fips']): int(row['population']) for row in pop_data}
    fips_to_state_name = {int(row['state_fips']): row['state_name'] for row in pop_data}

count_records = [
    {
        'fips': fips,
        'per_100k': round(count / populations[fips] * 100_000, 2),
        'name': fips_to_state_name.get(fips, str(fips)),
    }
    for fips, count in counts.items()
    if fips in populations
]

states_topo = alt.topo_feature(vega_data.us_10m.url, 'states')

chart = (
    alt.Chart(states_topo)
    .mark_geoshape()
    .transform_lookup(
        lookup='id',
        from_=alt.LookupData(
            data=alt.InlineData(values=count_records),
            key='fips',
            fields=['per_100k', 'name'],
        ),
    )
    .encode(
        color=alt.Color(
            'per_100k:Q',
            scale=alt.Scale(scheme='viridis'),
            legend=alt.Legend(title='Obs. per 100k residents'),
        ),
        tooltip=[
            alt.Tooltip('name:N', title='State'),
            alt.Tooltip('per_100k:Q', title='Obs. per 100k residents'),
        ],
    )
    .project('albersUsa')
    .properties(
        title=f'{TAXON_NAME} observations per 100k residents by state',
        width=900,
        height=500,
    )
)

chart