{ "cells": [ { "cell_type": "markdown", "id": "6623b237-1e3a-4672-bf62-a1a395daec3b", "metadata": {}, "source": [ "# Tutorial 3: Data Visualizations\n", "[](https://mybinder.org/v2/gh/pyinat/pyinaturalist/main?filepath=examples%2FTutorial_3_Data_Visualizations.ipynb)\n", "\n", "This notebook will show you a few basic visualizations you can make with your own observation data.\n", "\n", "We'll do this with [Pandas](https://pandas.pydata.org/docs/getting_started/index.html) and [Altair](https://altair-viz.github.io). Don't worry if you're not familiar with those tools, this is just to demonstrate the kinds of things you can do with your data." ] }, { "cell_type": "code", "execution_count": 1, "id": "1a77766f-3096-49da-8860-c3686859f91e", "metadata": {}, "outputs": [], "source": [ "import altair as alt\n", "import pandas as pd\n", "\n", "from pyinaturalist import iNatClient, pprint\n", "\n", "# enable_logging()\n", "client = iNatClient()" ] }, { "cell_type": "markdown", "id": "32730c39-027a-4e89-9fd8-0feb284bfcf3", "metadata": {}, "source": [ "## Observation data\n", "We'll start with all of your own observation data:" ] }, { "cell_type": "code", "execution_count": 2, "id": "cde06918-2e13-41b6-9ce6-2f5be94551c5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n",
" ID Taxon ID Taxon Observed on User Location \n",
" ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ \n",
" 30688807 1415100 Cleomella serrulata (Rocky Mountain Beeplant) Aug 12, 2019 jkcook Johnston, IA, USA \n",
" 30688955 47912 Asclepias tuberosa (Butterfly Milkweed) Aug 12, 2019 jkcook Johnston, IA, USA \n",
" 30689111 60251 Verbena hastata (Blue Vervain) Aug 12, 2019 jkcook Johnston, IA, USA \n",
" 30689221 121968 Andropogon gerardi (Big Bluestem) Aug 12, 2019 jkcook Johnston, IA, USA \n",
" 30689306 121968 Andropogon gerardi (Big Bluestem) Aug 12, 2019 jkcook Johnston, IA, USA \n",
" \n",
"\n"
],
"text/plain": [
" \n",
" \u001b[1;37m \u001b[0m\u001b[1;37mID \u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mTaxon ID\u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mTaxon \u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mObserved on \u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mUser \u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mLocation \u001b[0m\u001b[1;37m \u001b[0m \n",
" ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ \n",
" \u001b[2;36m \u001b[0m\u001b[2;36m30688807\u001b[0m\u001b[2;36m \u001b[0m \u001b[2;36m \u001b[0m\u001b[2;36m1415100 \u001b[0m\u001b[2;36m \u001b[0m \u001b[2;32m \u001b[0m\u001b[2;3;32mCleomella serrulata\u001b[0m\u001b[2;32m (Rocky Mountain Beeplant)\u001b[0m\u001b[2;32m \u001b[0m \u001b[2;34m \u001b[0m\u001b[2;34mAug 12, 2019\u001b[0m\u001b[2;34m \u001b[0m \u001b[2;35m \u001b[0m\u001b[2;35mjkcook\u001b[0m\u001b[2;35m \u001b[0m \u001b[2;37m \u001b[0m\u001b[2;37mJohnston, IA, USA\u001b[0m\u001b[2;37m \u001b[0m \n",
" \u001b[36m \u001b[0m\u001b[36m30688955\u001b[0m\u001b[36m \u001b[0m \u001b[36m \u001b[0m\u001b[36m47912 \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[3;32mAsclepias tuberosa\u001b[0m\u001b[32m (Butterfly Milkweed) \u001b[0m\u001b[32m \u001b[0m \u001b[34m \u001b[0m\u001b[34mAug 12, 2019\u001b[0m\u001b[34m \u001b[0m \u001b[35m \u001b[0m\u001b[35mjkcook\u001b[0m\u001b[35m \u001b[0m \u001b[37m \u001b[0m\u001b[37mJohnston, IA, USA\u001b[0m\u001b[37m \u001b[0m \n",
" \u001b[2;36m \u001b[0m\u001b[2;36m30689111\u001b[0m\u001b[2;36m \u001b[0m \u001b[2;36m \u001b[0m\u001b[2;36m60251 \u001b[0m\u001b[2;36m \u001b[0m \u001b[2;32m \u001b[0m\u001b[2;3;32mVerbena hastata\u001b[0m\u001b[2;32m (Blue Vervain) \u001b[0m\u001b[2;32m \u001b[0m \u001b[2;34m \u001b[0m\u001b[2;34mAug 12, 2019\u001b[0m\u001b[2;34m \u001b[0m \u001b[2;35m \u001b[0m\u001b[2;35mjkcook\u001b[0m\u001b[2;35m \u001b[0m \u001b[2;37m \u001b[0m\u001b[2;37mJohnston, IA, USA\u001b[0m\u001b[2;37m \u001b[0m \n",
" \u001b[36m \u001b[0m\u001b[36m30689221\u001b[0m\u001b[36m \u001b[0m \u001b[36m \u001b[0m\u001b[36m121968 \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[3;32mAndropogon gerardi\u001b[0m\u001b[32m (Big Bluestem) \u001b[0m\u001b[32m \u001b[0m \u001b[34m \u001b[0m\u001b[34mAug 12, 2019\u001b[0m\u001b[34m \u001b[0m \u001b[35m \u001b[0m\u001b[35mjkcook\u001b[0m\u001b[35m \u001b[0m \u001b[37m \u001b[0m\u001b[37mJohnston, IA, USA\u001b[0m\u001b[37m \u001b[0m \n",
" \u001b[2;36m \u001b[0m\u001b[2;36m30689306\u001b[0m\u001b[2;36m \u001b[0m \u001b[2;36m \u001b[0m\u001b[2;36m121968 \u001b[0m\u001b[2;36m \u001b[0m \u001b[2;32m \u001b[0m\u001b[2;3;32mAndropogon gerardi\u001b[0m\u001b[2;32m (Big Bluestem) \u001b[0m\u001b[2;32m \u001b[0m \u001b[2;34m \u001b[0m\u001b[2;34mAug 12, 2019\u001b[0m\u001b[2;34m \u001b[0m \u001b[2;35m \u001b[0m\u001b[2;35mjkcook\u001b[0m\u001b[2;35m \u001b[0m \u001b[2;37m \u001b[0m\u001b[2;37mJohnston, IA, USA\u001b[0m\u001b[2;37m \u001b[0m \n",
" \n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Replace with your own username\n",
"USERNAME = 'jkcook'\n",
"my_observations = client.observations.search(user_id=USERNAME).all()\n",
"pprint(my_observations[:5])"
]
},
{
"cell_type": "markdown",
"id": "a3a7b1fe-859f-428c-8c79-a335f739aa06",
"metadata": {},
"source": [
"## Basic historgam\n",
"Next, let's make a simple histogram to show your observations over time.\n",
"\n",
"Start by putting your observations into a DataFrame to make them easier to work with:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c8cc3706-c24c-4301-82b4-66dec9ffbe29",
"metadata": {},
"outputs": [],
"source": [
"source = pd.DataFrame([{'date': o.observed_on.isoformat()} for o in my_observations])"
]
},
{
"cell_type": "markdown",
"id": "ae76533c-bbf1-4729-9d61-4615a1f2c651",
"metadata": {},
"source": [
"And then display it as a bar chart:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b64ab75c-f986-4d4c-acb5-a76f655aeb88",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
""
],
"text/plain": [
"\u001b[1;35malt.Chart\u001b[0m\u001b[1m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1m)\u001b[0m"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(\n",
" alt.Chart(source)\n",
" .mark_bar()\n",
" .properties(width=700, height=500)\n",
" .encode(\n",
" x='yearmonth(date):T',\n",
" y=alt.Y(\n",
" 'count()',\n",
" scale=alt.Scale(type='log'),\n",
" axis=alt.Axis(title='Number of observations'),\n",
" ),\n",
" )\n",
")"
]
},
{
"cell_type": "markdown",
"id": "adc06647-e94b-496b-a81c-28e5b00e42f0",
"metadata": {},
"source": [
"## Histogram by iconic taxon\n",
"To show a bit more information, let's break down the observations by category (iconic taxon):"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "93b0abf4-c27a-4dbb-8686-f0cc7ef0afc6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
""
],
"text/plain": [
"\u001b[1;35malt.Chart\u001b[0m\u001b[1m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1m)\u001b[0m"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"source = pd.DataFrame(\n",
" [\n",
" {'date': o.observed_on.isoformat(), 'iconic_taxon': o.taxon.iconic_taxon_name}\n",
" for o in my_observations\n",
" ]\n",
")\n",
"(\n",
" alt.Chart(source)\n",
" .mark_bar()\n",
" .properties(width=700, height=500)\n",
" .encode(\n",
" x='yearmonth(date):T',\n",
" y=alt.Y(\n",
" 'count()',\n",
" scale=alt.Scale(type='symlog'),\n",
" axis=alt.Axis(title='Number of observations'),\n",
" ),\n",
" color='iconic_taxon',\n",
" )\n",
")"
]
},
{
"cell_type": "markdown",
"id": "ada6d742-1f14-4d01-bbb2-48c5df3d6e93",
"metadata": {},
"source": [
"## Observation map\n",
"Next, we can show the observations on a map. **Note:** This example only shows observations in the United States.\n",
"\n",
"First, get the coordinates for all your observations, skipping any that are missing locatino info:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "00e62990-49d2-4b27-828d-404814fd6b4a",
"metadata": {},
"outputs": [],
"source": [
"source = pd.DataFrame(\n",
" [\n",
" {\n",
" 'latitude': o.location[0],\n",
" 'longitude': o.location[1],\n",
" 'iconic_taxon': o.taxon.iconic_taxon_name,\n",
" }\n",
" for o in my_observations\n",
" if o.location\n",
" ]\n",
")"
]
},
{
"cell_type": "markdown",
"id": "fa9aa82e-d259-4994-86b1-57846fd438a1",
"metadata": {},
"source": [
"Then add the base layer. This example uses the `us_10m` dataset from [vega-datasets](https://github.com/vega/vega-datasets):"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4f3387c5-dbfa-4a14-a4d6-b7ff208274a5",
"metadata": {},
"outputs": [],
"source": [
"from vega_datasets import data\n",
"\n",
"states = alt.topo_feature(data.us_10m.url, feature='states')\n",
"background = (\n",
" alt.Chart(states)\n",
" .mark_geoshape(fill='lightgray', stroke='white')\n",
" .properties(width=850, height=500)\n",
" .project('albersUsa')\n",
")"
]
},
{
"cell_type": "markdown",
"id": "6a0256d4-220e-4d57-946d-723aca7137c6",
"metadata": {},
"source": [
"And finally, add your observation locations:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "89b8992f-f819-4c4e-8938-53f2fc2a2ff5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
""
],
"text/plain": [
"\u001b[1;35malt.LayerChart\u001b[0m\u001b[1m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1m)\u001b[0m"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"points = (\n",
" alt.Chart(source)\n",
" .mark_circle()\n",
" .encode(\n",
" longitude='longitude:Q',\n",
" latitude='latitude:Q',\n",
" )\n",
")\n",
"\n",
"# Show the combined background + points\n",
"background + points"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}