Building on top of matplotlib#

Read in Datasets#

  1. WAS 2020 Data

  2. Subset of an AVIRIS image

import pandas as pd
import rasterio
# Import example WAS data
was_2020_filepath = "./data/SARP 2020 final.xlsx"
was_2020 = pd.read_excel(was_2020_filepath, "INPUT", skipfooter=7)
# Import example AVIRIS data
with rasterio.open('./data/subset_f180628t01p00r02_corr_v1k1_img') as src:
    metadata = src.meta
    bands = src.read()

Accessing the matplotlib in pandas#

pandas’ plotting tutorial. Useful for reference or for skimming to see more of what the library can do.

# Processing the dataframe
flagstaff_co = was_2020[was_2020['Location'] == 'Mars Hill/Lowell Observatory, Flagstaff, Arizona'][['Date', 'CO (ppbv)', 'CH4 (ppmv height)']]
flagstaff_co = flagstaff_co.set_index('Date')
# Making the plot
# !! Notice that DATAFRAME.plot() returns an axes object !!
ax = flagstaff_co['CO (ppbv)'].plot(xlabel='date', ylabel='CO ppbv')
ax.set_title('CO over time for Flagstaff, AZ')
Text(0.5, 1.0, 'CO over time for Flagstaff, AZ')
../../_images/dc491db68b8aa0d2d0ddae7ead19c6d738ee8ad67edf999ad5eeb0a170b0898b.png
import matplotlib.pyplot as plt
# We can set our pandas dataframe inside a subplot created with matplotlib
fig, (ax1, ax2) = plt.subplots(2, 1)
flagstaff_co.plot(y='CH4 (ppmv height)', ax=ax1, xlabel='date')
flagstaff_co.plot(y='CO (ppbv)', ax=ax2, xlabel='date')
fig.subplots_adjust(hspace=0.7)  # Add more space between plots for labels
../../_images/aaa128c13f94bdc544bafce5a3634def256f8acca5e462d9c5f35a09fae163e6.png

.fig.subplots_adjust() inputs

Scatter plot example#

import numpy as np
# Prepare the data
was_2020[(was_2020['n-Butane (E/B)'] < 0) | (was_2020['n-Butane (E/B)'] > 1000)]  = np.NaN
was_2020[(was_2020['i-Butane (E/B)'] < 0) | (was_2020['i-Butane (E/B)'] > 1000)]  = np.NaN

alpha sets the opacity of the points, meaning that it dictates how much you can see through them. I like using it on scatter plots becuase it helps see better the density of points when they are overlapping.

was_2020.plot.scatter(x='i-Butane (E/B)', y='n-Butane (E/B)', alpha=0.3)
plt.title('Butane ratios')
Text(0.5, 1.0, 'Butane ratios')
../../_images/7daa7862efcd0602c4a427af835f8953321e5d74fd5af3e57b900a4b70895245.png
was_2020.plot()
<Axes: >
../../_images/59d8c38299f32261fbb03638719aced8dea9cf365aa5245a1f0ad69bd1ab9273.png

seaborn#

Trading control for ease#

matplotlib really lets you do most anything you can imagine to plots. While the control is nice you don’t always need it. If instead you are interested in making plots faster, you might use the library seaborn.

Note: seaborn is most useful if you are using data in a pandas dataframe. Creating plots from seaborn for numpy matrices is totally fine, but you don’t get the same level of benefit as you do with column labelled pandas data.

Installation#

conda install -c conda-forge -n lessons seaborn --yes

Feature: known for built in statistics support

Example with a pandas dataframe#

# Organizing the dataframe
was_2020_subset = was_2020[(was_2020['Weather'] == 'Cloudy') | (was_2020['Weather'] == 'clear')]
# Plotting with axis labels in one line
sns.relplot(data=was_2020_subset, x='i-Pentane (E/B)', y='n-Pentane (E/B)')
<seaborn.axisgrid.FacetGrid at 0x7effb2a11c90>
../../_images/d9cb8b91c894e0eeb75547a07860aafc0d189791d0b64670195c003091ac5530.png
# A line plot version
sns.relplot(data=was_2020_subset, x='i-Pentane (E/B)', y='n-Pentane (E/B)', kind='line')
---------------------------------------------------------------------------
OptionError                               Traceback (most recent call last)
Cell In[16], line 2
      1 # A line plot version
----> 2 sns.relplot(data=was_2020_subset, x='i-Pentane (E/B)', y='n-Pentane (E/B)', kind='line')

File /srv/conda/envs/notebook/lib/python3.11/site-packages/seaborn/relational.py:955, in relplot(data, x, y, hue, size, style, units, row, col, col_wrap, row_order, col_order, palette, hue_order, hue_norm, sizes, size_order, size_norm, markers, dashes, style_order, legend, kind, height, aspect, facet_kws, **kwargs)
    946 g = FacetGrid(
    947     data=full_data.dropna(axis=1, how="all"),
    948     **grid_kws,
   (...)
    951     **facet_kws
    952 )
    954 # Draw the plot
--> 955 g.map_dataframe(func, **plot_kws)
    957 # Label the axes, using the original variables
    958 g.set(xlabel=variables.get("x"), ylabel=variables.get("y"))

File /srv/conda/envs/notebook/lib/python3.11/site-packages/seaborn/axisgrid.py:819, in FacetGrid.map_dataframe(self, func, *args, **kwargs)
    816     kwargs["data"] = data_ijk
    818     # Draw the plot
--> 819     self._facet_plot(func, ax, args, kwargs)
    821 # For axis labels, prefer to use positional args for backcompat
    822 # but also extract the x/y kwargs and use if no corresponding arg
    823 axis_labels = [kwargs.get("x", None), kwargs.get("y", None)]

File /srv/conda/envs/notebook/lib/python3.11/site-packages/seaborn/axisgrid.py:848, in FacetGrid._facet_plot(self, func, ax, plot_args, plot_kwargs)
    846     plot_args = []
    847     plot_kwargs["ax"] = ax
--> 848 func(*plot_args, **plot_kwargs)
    850 # Sort out the supporting information
    851 self._update_legend_data(ax)

File /srv/conda/envs/notebook/lib/python3.11/site-packages/seaborn/relational.py:645, in lineplot(data, x, y, hue, size, style, units, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, estimator, errorbar, n_boot, seed, orient, sort, err_style, err_kws, legend, ci, ax, **kwargs)
    642 color = kwargs.pop("color", kwargs.pop("c", None))
    643 kwargs["color"] = _default_color(ax.plot, hue, color, kwargs)
--> 645 p.plot(ax, kwargs)
    646 return ax

File /srv/conda/envs/notebook/lib/python3.11/site-packages/seaborn/relational.py:423, in _LinePlotter.plot(self, ax, kws)
    415 # TODO How to handle NA? We don't want NA to propagate through to the
    416 # estimate/CI when some values are present, but we would also like
    417 # matplotlib to show "gaps" in the line when all values are missing.
   (...)
    420 
    421 # Loop over the semantic subsets and add to the plot
    422 grouping_vars = "hue", "size", "style"
--> 423 for sub_vars, sub_data in self.iter_data(grouping_vars, from_comp_data=True):
    425     if self.sort:
    426         sort_vars = ["units", orient, other]

File /srv/conda/envs/notebook/lib/python3.11/site-packages/seaborn/_oldcore.py:1028, in VectorPlotter.iter_data(self, grouping_vars, reverse, from_comp_data, by_facet, allow_empty, dropna)
   1023 grouping_vars = [
   1024     var for var in grouping_vars if var in self.variables
   1025 ]
   1027 if from_comp_data:
-> 1028     data = self.comp_data
   1029 else:
   1030     data = self.plot_data

File /srv/conda/envs/notebook/lib/python3.11/site-packages/seaborn/_oldcore.py:1119, in VectorPlotter.comp_data(self)
   1117 grouped = self.plot_data[var].groupby(self.converters[var], sort=False)
   1118 for converter, orig in grouped:
-> 1119     with pd.option_context('mode.use_inf_as_null', True):
   1120         orig = orig.dropna()
   1121         if var in self.var_levels:
   1122             # TODO this should happen in some centralized location
   1123             # it is similar to GH2419, but more complicated because
   1124             # supporting `order` in categorical plots is tricky

File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/_config/config.py:480, in option_context.__enter__(self)
    479 def __enter__(self) -> None:
--> 480     self.undo = [(pat, _get_option(pat)) for pat, val in self.ops]
    482     for pat, val in self.ops:
    483         _set_option(pat, val, silent=True)

File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/_config/config.py:480, in <listcomp>(.0)
    479 def __enter__(self) -> None:
--> 480     self.undo = [(pat, _get_option(pat)) for pat, val in self.ops]
    482     for pat, val in self.ops:
    483         _set_option(pat, val, silent=True)

File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/_config/config.py:146, in _get_option(pat, silent)
    145 def _get_option(pat: str, silent: bool = False) -> Any:
--> 146     key = _get_single_key(pat, silent)
    148     # walk the nested dict
    149     root, k = _get_root(key)

File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/_config/config.py:132, in _get_single_key(pat, silent)
    130     if not silent:
    131         _warn_if_deprecated(pat)
--> 132     raise OptionError(f"No such keys(s): {repr(pat)}")
    133 if len(keys) > 1:
    134     raise OptionError("Pattern matched multiple keys")

OptionError: No such keys(s): 'mode.use_inf_as_null'
../../_images/0d9968e9074952bd8fda5b82ccbbf68bbed67a77621725c9015004fdb7cc981d.png
# Adding colors, sizes, and comparison graphs based on additional columns of the dataframe
sns.relplot(data=was_2020_subset, x='i-Pentane (E/B)', y='n-Pentane (E/B)', hue='State',
           col='Weather')
<seaborn.axisgrid.FacetGrid at 0x21c703f29a0>
../../_images/fc6f56a2d4881a9677b18a01df675c41d6851635a848ccf7afc48049a1cd57ae.png

Example with numpy arrays#

You can use seaborn with matrix data but in the case of numpy arrays you don’t get quite as much benefit. The colors and styles are nice but you don’t get the axis lables with just a matrix of data.

band_100_135 = bands[:, 100, 135]
band_200_330 = bands[:, 200, 330]
sns.relplot(x=band_100_135, y=band_200_330)
<seaborn.axisgrid.FacetGrid at 0x21c7038bb20>
../../_images/2c505b45e29ac5feb7340c7d9e08375213d42f4a16b3fde9d1fae18bf2109df1.png

Customizing#

Because seaborn is built on top of matplotlib, you can still choose to customize the plots with the same commands as matplotlib.

sns.relplot(data=was_2020_subset, x='i-Pentane (E/B)', y='n-Pentane (E/B)', alpha=0.4)
plt.title('Pentane Ratios')
plt.xlim(0, 1200) # change the x axis range
plt.ylim(0,850)  # change the y axis range
(0.0, 850.0)
../../_images/3af10020815909cba41fd48754ef92880b2b68c9756a904c7839d63159b18e79.png