{ "cells": [ { "cell_type": "markdown", "id": "6623b237-1e3a-4672-bf62-a1a395daec3b", "metadata": {}, "source": [ "# Tutorial 3: Data Visualizations\n", "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/pyinat/pyinaturalist/main?filepath=examples%2FTutorial_3_Data_Visualizations.ipynb)\n", "\n", "This notebook will show you a few basic visualizations you can make with your own observation data.\n", "\n", "We'll do this with [Pandas](https://pandas.pydata.org/docs/getting_started/index.html) and [Altair](https://altair-viz.github.io). Don't worry if you're not familiar with those tools, this is just to demonstrate the kinds of things you can do with your data." ] }, { "cell_type": "code", "execution_count": 1, "id": "1a77766f-3096-49da-8860-c3686859f91e", "metadata": {}, "outputs": [], "source": [ "import altair as alt\n", "import pandas as pd\n", "\n", "from pyinaturalist import iNatClient, pprint\n", "\n", "# enable_logging()\n", "client = iNatClient()" ] }, { "cell_type": "markdown", "id": "32730c39-027a-4e89-9fd8-0feb284bfcf3", "metadata": {}, "source": [ "## Observation data\n", "We'll start with all of your own observation data:" ] }, { "cell_type": "code", "execution_count": 2, "id": "cde06918-2e13-41b6-9ce6-2f5be94551c5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
                                                                                                                   \n",
       "  ID         Taxon ID   Taxon                                           Observed on    User     Location           \n",
       " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ \n",
       "  30688807   1415100    Cleomella serrulata (Rocky Mountain Beeplant)   Aug 12, 2019   jkcook   Johnston, IA, USA  \n",
       "  30688955   47912      Asclepias tuberosa (Butterfly Milkweed)         Aug 12, 2019   jkcook   Johnston, IA, USA  \n",
       "  30689111   60251      Verbena hastata (Blue Vervain)                  Aug 12, 2019   jkcook   Johnston, IA, USA  \n",
       "  30689221   121968     Andropogon gerardi (Big Bluestem)               Aug 12, 2019   jkcook   Johnston, IA, USA  \n",
       "  30689306   121968     Andropogon gerardi (Big Bluestem)               Aug 12, 2019   jkcook   Johnston, IA, USA  \n",
       "                                                                                                                   \n",
       "
\n" ], "text/plain": [ " \n", " \u001b[1;37m \u001b[0m\u001b[1;37mID \u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mTaxon ID\u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mTaxon \u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mObserved on \u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mUser \u001b[0m\u001b[1;37m \u001b[0m \u001b[1;37m \u001b[0m\u001b[1;37mLocation \u001b[0m\u001b[1;37m \u001b[0m \n", " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ \n", " \u001b[2;36m \u001b[0m\u001b[2;36m30688807\u001b[0m\u001b[2;36m \u001b[0m \u001b[2;36m \u001b[0m\u001b[2;36m1415100 \u001b[0m\u001b[2;36m \u001b[0m \u001b[2;32m \u001b[0m\u001b[2;3;32mCleomella serrulata\u001b[0m\u001b[2;32m (Rocky Mountain Beeplant)\u001b[0m\u001b[2;32m \u001b[0m \u001b[2;34m \u001b[0m\u001b[2;34mAug 12, 2019\u001b[0m\u001b[2;34m \u001b[0m \u001b[2;35m \u001b[0m\u001b[2;35mjkcook\u001b[0m\u001b[2;35m \u001b[0m \u001b[2;37m \u001b[0m\u001b[2;37mJohnston, IA, USA\u001b[0m\u001b[2;37m \u001b[0m \n", " \u001b[36m \u001b[0m\u001b[36m30688955\u001b[0m\u001b[36m \u001b[0m \u001b[36m \u001b[0m\u001b[36m47912 \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[3;32mAsclepias tuberosa\u001b[0m\u001b[32m (Butterfly Milkweed) \u001b[0m\u001b[32m \u001b[0m \u001b[34m \u001b[0m\u001b[34mAug 12, 2019\u001b[0m\u001b[34m \u001b[0m \u001b[35m \u001b[0m\u001b[35mjkcook\u001b[0m\u001b[35m \u001b[0m \u001b[37m \u001b[0m\u001b[37mJohnston, IA, USA\u001b[0m\u001b[37m \u001b[0m \n", " \u001b[2;36m \u001b[0m\u001b[2;36m30689111\u001b[0m\u001b[2;36m \u001b[0m \u001b[2;36m \u001b[0m\u001b[2;36m60251 \u001b[0m\u001b[2;36m \u001b[0m \u001b[2;32m \u001b[0m\u001b[2;3;32mVerbena hastata\u001b[0m\u001b[2;32m (Blue Vervain) \u001b[0m\u001b[2;32m \u001b[0m \u001b[2;34m \u001b[0m\u001b[2;34mAug 12, 2019\u001b[0m\u001b[2;34m \u001b[0m \u001b[2;35m \u001b[0m\u001b[2;35mjkcook\u001b[0m\u001b[2;35m \u001b[0m \u001b[2;37m \u001b[0m\u001b[2;37mJohnston, IA, USA\u001b[0m\u001b[2;37m \u001b[0m \n", " \u001b[36m \u001b[0m\u001b[36m30689221\u001b[0m\u001b[36m \u001b[0m \u001b[36m \u001b[0m\u001b[36m121968 \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[3;32mAndropogon gerardi\u001b[0m\u001b[32m (Big Bluestem) \u001b[0m\u001b[32m \u001b[0m \u001b[34m \u001b[0m\u001b[34mAug 12, 2019\u001b[0m\u001b[34m \u001b[0m \u001b[35m \u001b[0m\u001b[35mjkcook\u001b[0m\u001b[35m \u001b[0m \u001b[37m \u001b[0m\u001b[37mJohnston, IA, USA\u001b[0m\u001b[37m \u001b[0m \n", " \u001b[2;36m \u001b[0m\u001b[2;36m30689306\u001b[0m\u001b[2;36m \u001b[0m \u001b[2;36m \u001b[0m\u001b[2;36m121968 \u001b[0m\u001b[2;36m \u001b[0m \u001b[2;32m \u001b[0m\u001b[2;3;32mAndropogon gerardi\u001b[0m\u001b[2;32m (Big Bluestem) \u001b[0m\u001b[2;32m \u001b[0m \u001b[2;34m \u001b[0m\u001b[2;34mAug 12, 2019\u001b[0m\u001b[2;34m \u001b[0m \u001b[2;35m \u001b[0m\u001b[2;35mjkcook\u001b[0m\u001b[2;35m \u001b[0m \u001b[2;37m \u001b[0m\u001b[2;37mJohnston, IA, USA\u001b[0m\u001b[2;37m \u001b[0m \n", " \n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Replace with your own username\n", "USERNAME = 'jkcook'\n", "my_observations = client.observations.search(user_id=USERNAME).all()\n", "pprint(my_observations[:5])" ] }, { "cell_type": "markdown", "id": "a3a7b1fe-859f-428c-8c79-a335f739aa06", "metadata": {}, "source": [ "## Basic historgam\n", "Next, let's make a simple histogram to show your observations over time.\n", "\n", "Start by putting your observations into a DataFrame to make them easier to work with:" ] }, { "cell_type": "code", "execution_count": 3, "id": "c8cc3706-c24c-4301-82b4-66dec9ffbe29", "metadata": {}, "outputs": [], "source": [ "source = pd.DataFrame([{'date': o.observed_on.isoformat()} for o in my_observations])" ] }, { "cell_type": "markdown", "id": "ae76533c-bbf1-4729-9d61-4615a1f2c651", "metadata": {}, "source": [ "And then display it as a bar chart:" ] }, { "cell_type": "code", "execution_count": 4, "id": "b64ab75c-f986-4d4c-acb5-a76f655aeb88", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "\n",
       "
\n", "" ], "text/plain": [ "\u001b[1;35malt.Chart\u001b[0m\u001b[1m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1m)\u001b[0m" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(\n", " alt.Chart(source)\n", " .mark_bar()\n", " .properties(width=700, height=500)\n", " .encode(\n", " x='yearmonth(date):T',\n", " y=alt.Y(\n", " 'count()',\n", " scale=alt.Scale(type='log'),\n", " axis=alt.Axis(title='Number of observations'),\n", " ),\n", " )\n", ")" ] }, { "cell_type": "markdown", "id": "adc06647-e94b-496b-a81c-28e5b00e42f0", "metadata": {}, "source": [ "## Histogram by iconic taxon\n", "To show a bit more information, let's break down the observations by category (iconic taxon):" ] }, { "cell_type": "code", "execution_count": 5, "id": "93b0abf4-c27a-4dbb-8686-f0cc7ef0afc6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "\n",
       "
\n", "" ], "text/plain": [ "\u001b[1;35malt.Chart\u001b[0m\u001b[1m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1m)\u001b[0m" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "source = pd.DataFrame(\n", " [\n", " {'date': o.observed_on.isoformat(), 'iconic_taxon': o.taxon.iconic_taxon_name}\n", " for o in my_observations\n", " ]\n", ")\n", "(\n", " alt.Chart(source)\n", " .mark_bar()\n", " .properties(width=700, height=500)\n", " .encode(\n", " x='yearmonth(date):T',\n", " y=alt.Y(\n", " 'count()',\n", " scale=alt.Scale(type='symlog'),\n", " axis=alt.Axis(title='Number of observations'),\n", " ),\n", " color='iconic_taxon',\n", " )\n", ")" ] }, { "cell_type": "markdown", "id": "ada6d742-1f14-4d01-bbb2-48c5df3d6e93", "metadata": {}, "source": [ "## Observation map\n", "Next, we can show the observations on a map. **Note:** This example only shows observations in the United States.\n", "\n", "First, get the coordinates for all your observations, skipping any that are missing locatino info:" ] }, { "cell_type": "code", "execution_count": 6, "id": "00e62990-49d2-4b27-828d-404814fd6b4a", "metadata": {}, "outputs": [], "source": [ "source = pd.DataFrame(\n", " [\n", " {\n", " 'latitude': o.location[0],\n", " 'longitude': o.location[1],\n", " 'iconic_taxon': o.taxon.iconic_taxon_name,\n", " }\n", " for o in my_observations\n", " if o.location\n", " ]\n", ")" ] }, { "cell_type": "markdown", "id": "fa9aa82e-d259-4994-86b1-57846fd438a1", "metadata": {}, "source": [ "Then add the base layer. This example uses the `us_10m` dataset from [vega-datasets](https://github.com/vega/vega-datasets):" ] }, { "cell_type": "code", "execution_count": 7, "id": "4f3387c5-dbfa-4a14-a4d6-b7ff208274a5", "metadata": {}, "outputs": [], "source": [ "from vega_datasets import data\n", "\n", "states = alt.topo_feature(data.us_10m.url, feature='states')\n", "background = (\n", " alt.Chart(states)\n", " .mark_geoshape(fill='lightgray', stroke='white')\n", " .properties(width=850, height=500)\n", " .project('albersUsa')\n", ")" ] }, { "cell_type": "markdown", "id": "6a0256d4-220e-4d57-946d-723aca7137c6", "metadata": {}, "source": [ "And finally, add your observation locations:" ] }, { "cell_type": "code", "execution_count": 8, "id": "89b8992f-f819-4c4e-8938-53f2fc2a2ff5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "\n",
       "
\n", "" ], "text/plain": [ "\u001b[1;35malt.LayerChart\u001b[0m\u001b[1m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1m)\u001b[0m" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "points = (\n", " alt.Chart(source)\n", " .mark_circle()\n", " .encode(\n", " longitude='longitude:Q',\n", " latitude='latitude:Q',\n", " )\n", ")\n", "\n", "# Show the combined background + points\n", "background + points" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.12" } }, "nbformat": 4, "nbformat_minor": 5 }