Source code for stratmc.plotting

import math
import sys

import arviz as az
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import gridspec
from matplotlib import pyplot as plt
from scipy.ndimage import gaussian_filter as gaussian
from scipy.stats import gaussian_kde

from stratmc.data import accumulation_rate, clean_data
from stratmc.inference import (
    age_range_to_height,
    calculate_lengthscale_stability,
    calculate_proxy_signal_stability,
    count_samples,
    find_gaps,
    map_ages_to_section,
)

pd.options.mode.chained_assignment = None

[docs] def proxy_strat(sample_df, ages_df, proxy = 'd13c', plot_constraints = True, plot_excluded_samples = False, cmap = 'Spectral', legend = False, **kwargs): """ Plot stratigraphic data (proxy observations and age constraints) for each section. .. plot:: from stratmc.data import load_object from stratmc.plotting import proxy_strat example_sample_path = 'examples/example_sample_df' example_ages_path = 'examples/example_ages_df' sample_df = load_object(example_sample_path) ages_df = load_object(example_ages_path) proxy_strat(sample_df, ages_df, proxy = 'd13c') plt.show() Parameters ---------- sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. ages_df: pandas.DataFrame :class:`pandas.DataFrame` containing age constraints for all sections. proxy: str, optional Name of proxy. Defaults to 'd13c'. sections: list(str) or numpy.array(str), optional List of sections to plot. Defaults to all sections in ``sample_df``. plot_constraints: bool, optional Whether to plot age constraints as dashed lines. Ages are printed above dashed lines by defalut; to turn off age labels, pass ``print_ages = False``. plot_excluded_samples: bool, optional Whether to plot proxy observations for excluded samples (``Exclude?`` is ``True`` in ``sample_df``) as red dots. Defaults to ``False``. cmap: str, optional Name of seaborn color palette to use for sections. Defaults to 'Spectral'. legend: bool, optional Generate a legend. Defaults to ``False``. print_ages: bool, optional Print ages as text on figure. Returns ------- fig: matplotlib.pyplot.figure Figure with observations for each section. """ if 'sections' in kwargs: sections = list(kwargs['sections']) else: sections = np.unique(sample_df[~np.isnan(sample_df[proxy])]['section']) if 'print_ages' in kwargs: print_ages = kwargs['print_ages'] else: print_ages = True if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 cs = {} pal = sns.color_palette(cmap,n_colors=len(sections)) cols = 4 N = len(sections) rows = int(math.ceil(N/cols)) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (10, 4.5*rows)) for n in range(N): section = sections[n] cs[section] = pal[n] ax = fig.add_subplot(gs[n]) if len(sample_df[proxy][(sample_df['section'] == section) & (sample_df['Exclude?'] == True)]) > 0: if plot_excluded_samples: ax.scatter(sample_df[proxy][(sample_df['section'] == section) & (sample_df['Exclude?'] == True)], sample_df['height'][(sample_df['section'] == section) & (sample_df['Exclude?'] == True)], color = 'red', edgecolors = 'k', label = 'Excluded sample') ax.scatter(sample_df[proxy][(sample_df['section'] == section) & (sample_df['Exclude?'] != True)], sample_df['height'][(sample_df['section'] == section) & (sample_df['Exclude?'] != True)], color = cs[section], edgecolors = 'k', label = 'Sample') xl = ax.get_xlim() yl = ax.get_ylim() if plot_constraints: age_heights = ages_df['height'][(ages_df['section']==section) & (~ages_df['Exclude?']) & (~ages_df['intermediate detrital?']) & (~ages_df['intermediate intrusive?']) & (~ages_df['depositional?'])] for h in age_heights: ax.axhline(h, color = cs[section], linestyle = 'dashed', label = 'Depositional age', zorder = 2) detrital_age_heights = ages_df['height'][(ages_df['section']==section) & (ages_df['Exclude?'] == False) & (ages_df['intermediate detrital?'] == True)] for h in detrital_age_heights: ax.axhline(h, color = cs[section], linestyle = 'dotted', label = 'Detrital age', zorder = 2) intrusive_age_heights = ages_df['height'][(ages_df['section']==section) & (ages_df['Exclude?'] == False) & (ages_df['intermediate intrusive?'] == True)] for h in intrusive_age_heights: ax.axhline(h, color = cs[section], linestyle = 'dashdot', label = 'Intrusive age', zorder = 2) if legend: ax.legend() x_range = xl[1] - xl[0] y_range = yl[1] - yl[0] if print_ages: for age, height, sigma, dist_type, param1, param2 in zip(ages_df['age'][(ages_df['section']==section) & (~ages_df['Exclude?']) & (~ages_df['depositional?'])], ages_df['height'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])], ages_df['age_std'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])], ages_df['distribution_type'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])], ages_df['param_1'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])], ages_df['param_2'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])]): if dist_type == 'Normal': ax.text(xl[0] + x_range * 0.05, # 0.45 height + y_range * 0.01, s = f'{age:.1f}' + r'$\pm$' + f'{2*sigma:.1f} Ma', fontsize = 10) elif dist_type == 'Uniform': ax.text(xl[0] + x_range * 0.05, # 0.45 height + y_range * 0.01, s = f'{param1:.1f} - {param2:.1f} Ma', fontsize = 10) ax.set_xlim(xl) ax.set_ylabel('Height (m)', fontsize = fs) if proxy == 'd13c': ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}} (‰)$', fontsize = fs) else: ax.set_xlabel(proxy, fontsize = fs) ax.set_title(section, fontsize = fs) ax.tick_params(bottom = True, top = False, left = True, right = False, direction = 'out', labelsize = fs, width = 1.5) ax.locator_params(axis='x', nbins = 5) fig.tight_layout() plt.subplots_adjust(wspace=0.45) return fig
[docs] def proxy_inference(sample_df, ages_df, full_trace, legend = True, plot_constraints = False, plot_samples = False, plot_excluded_samples = False, plot_mean = False, plot_mle = False, orientation = 'horizontal', marker_size = 20, section_legend = False, section_cmap = 'Spectral', **kwargs): """ Plot the inferred proxy signal over time. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import proxy_inference full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' example_ages_path = 'examples/example_ages_df' sample_df = load_object(example_sample_path) ages_df = load_object(example_ages_path) proxy_inference(sample_df, ages_df, full_trace, proxy = 'd13c') plt.show() Parameters ---------- sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. ages_df: pandas.DataFrame :class:`pandas.DataFrame` containing age constraints for all sections. full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. proxy: str, optional Tracer to plot; only required if more than one proxy was included in the inference. legend: bool, optional Generate a legend. Defaults to ``True``. plot_constraints: bool, optional Plot age constraints for each section as dashed lines. Defaults to ``False``. plot_samples: bool, optional Plot proxy observations by most likely posterior age. Defaults to ``False``. plot_excluded_samples: bool, optional Plot proxy observations that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``. plot_mean: bool, optional Plot the mean as a dashed line. Defaults to ``False``. plot_mle: bool, optional Plot the maximum likelihood estimate. Defaults to ``False``. orientation: str, optional Orientation of figure ('horizontal' with age on the x-axis, or 'vertical' with age on the y-axis). Defaults to 'horizontal'. marker_size: int, optional Size of markers if ``plot_samples`` is ``True``. Defaults to 20. section_legend: bool, optional Include section names in the legend (if ``plot_samples`` is ``True``). Defaults to ``False``. section_cmap: str, optional Name of seaborn color palette to use for sections (if ``plot_samples`` is ``True``). Defaults to 'Spectral'. Returns ------- fig: matplotlib.pyplot.figure Figure with the proxy signal inference. """ if orientation == 'horizontal': horizontal = True vertical = False if orientation == 'vertical': horizontal = False vertical = True if 'figsize' in kwargs: figsize = kwargs['figsize'] elif horizontal: figsize = (9, 5) elif vertical: figsize = (5, 9) if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] if 'sections' in kwargs: sections = list(kwargs['sections']) else: sections = np.unique(sample_df[~np.isnan(sample_df[proxy])]['section']) sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections) ages = full_trace.X_new.X_new.values fig=plt.figure(figsize=figsize) ax = fig.gca() cs = {} pal = sns.color_palette(section_cmap,n_colors=len(sections)) for i in range(len(sections)): section = sections[i] cs[section] = pal[i] min_ages = [] max_ages = [] if plot_samples: # by max likelihood plotted_excluded = False plotted_data = False for section in sections: section_df = sample_df[sample_df['section']==section] proxy_idx = (~np.isnan(section_df[proxy])) excluded_idx = (~np.isnan(section_df[proxy])) & (section_df['Exclude?']) sec_ages = az.extract(full_trace.posterior)[str(section)+'_ages'].values max_like = np.zeros(sec_ages.shape[0]) for i in np.arange(sec_ages.shape[0]): sample_ages = sec_ages[i,:] dx = np.linspace(np.min(sample_ages), np.max(sample_ages), 1000) max_like[i] = dx[np.argmax(gaussian_kde(sample_ages, bw_method = 1)(dx))] if section_legend: label = section elif not plotted_data: label = 'Sample (most likely age)' else: label = '_nolegend_' if vertical: ax.scatter(section_df[proxy][proxy_idx & ~excluded_idx], max_like[proxy_idx & ~excluded_idx], #np.mean(sec_ages, axis = 1), s = marker_size, color = cs[section], label = label, edgecolors = 'k', lw = 0.5, zorder = 5) plotted_data = True if (plot_excluded_samples) and (len(section_df[proxy][excluded_idx]) > 0): if not plotted_excluded: excluded_label = 'Excluded samples' else: excluded_label = '_nolegend' ax.scatter(section_df[proxy][excluded_idx & proxy_idx], max_like[excluded_idx & proxy_idx], s = marker_size, color = 'none', label = excluded_label, edgecolors = cs[section], lw = 0.5, zorder = 5) plotted_excluded = True if horizontal: ax.scatter(max_like[proxy_idx & ~excluded_idx], section_df[proxy][proxy_idx & ~excluded_idx], s = marker_size, color = cs[section], label = label, lw = 0.5, edgecolors = 'k', zorder = 5) plotted_data = True if (plot_excluded_samples) and (len(section_df[proxy][proxy_idx & excluded_idx]) > 0): if not plotted_excluded: excluded_label = 'Excluded samples' else: excluded_label = '_nolegend' ax.scatter(max_like[excluded_idx & proxy_idx], section_df[proxy][excluded_idx & proxy_idx], s = marker_size, color = 'none', label = excluded_label, edgecolors = cs[section], lw = 0.5, zorder = 5) plotted_excluded = True min_ages = np.append(min_ages, np.min(np.mean(sec_ages,axis = 1))) max_ages = np.append(max_ages, np.max(np.mean(sec_ages, axis = 1))) proxy_pred = az.extract(full_trace.posterior_predictive)['f_pred_' + proxy].values lo = np.percentile(proxy_pred, 2.5, axis=1).flatten() hi = np.percentile(proxy_pred, 97.5, axis=1).flatten() if vertical: ax.fill_betweenx(ages.ravel(), hi, lo, color=(.95,.95,.95), label='95% envelope', linestyle = 'dotted', lw = 1.5, edgecolor = 'k', zorder = 0) if horizontal: ax.fill_between(ages.ravel(), hi, lo, color=(.95,.95,.95), label='95% envelope', linestyle = 'dotted', edgecolor = 'k', lw = 1.5, zorder = 0) lo = np.percentile(proxy_pred, 17, axis=1).flatten() hi = np.percentile(proxy_pred, 100-17, axis=1).flatten() if vertical: ax.fill_betweenx(ages.ravel(), hi, lo, color=(.8,.8,.8), linestyle = 'dashed', label='66% envelope', edgecolor = 'k', lw = 1.5, zorder = 1) if horizontal: ax.fill_between(ages.ravel(), hi, lo, color=(.8,.8,.8), linestyle = 'dashed', label='66% envelope', edgecolor = 'k', lw = 1.5, zorder = 1) lo = np.percentile(proxy_pred, 33.5, axis=1).flatten() hi = np.percentile(proxy_pred, 100-33.5, axis=1).flatten() if vertical: ax.fill_betweenx(ages.ravel(), hi, lo, color=(.65,.65,.65), label='33% envelope', edgecolor = 'k', lw = 1.5, zorder = 2) if horizontal: ax.fill_between(ages.ravel(), hi, lo, color=(.65,.65,.65), label='33% envelope', edgecolor = 'k', lw = 1.5, zorder = 2) if plot_mean: if horizontal: ax.plot(ages.ravel(), np.mean(proxy_pred,axis=1).flatten(), color = 'k', linestyle = 'solid', lw = 2, zorder = 3, label = 'Mean ' + str(proxy)) if vertical: ax.plot(np.mean(proxy_pred, axis=1).flatten(), ages.ravel(), color = 'k', linestyle = 'solid', lw = 2, zorder = 3, label = 'Mean ' + str(proxy)) if plot_mle: dy = np.linspace(np.min(proxy_pred), np.max(proxy_pred), 200) max_like = np.zeros(ages.size) for i in np.arange(ages.size): time_slice = proxy_pred[i,:] max_like[i] = dy[np.argmax(gaussian_kde(time_slice, bw_method = 1)(dy))] max_like = gaussian(max_like, 2) if horizontal: ax.plot(ages.ravel(), max_like, zorder = 4, color = 'k', linestyle = 'solid', lw = 2, label = 'Most likely ' + str(proxy)) if vertical: ax.plot(max_like, ages.ravel(), zorder = 4, color = 'k', linestyle = 'solid', lw = 2, label = 'Most likely ' + str(proxy)) xl = ax.get_xlim() yl = ax.get_ylim() if plot_constraints: i = 0 for section in sections: if i == 0: label = 'Age constraint' else: label = '_nolegend_' if vertical: ax.hlines(ages_df['age'][ages_df['section']==section], xl[0], xl[1], color = cs[section], linestyle = 'dashed', label = label, zorder = -1) ax.set_xlim(xl) if horizontal: ax.vlines(ages_df['age'][ages_df['section']==section], yl[0], yl[1], color = cs[section], linestyle = 'dashed', label = label, zorder = -1) ax.set_ylim(yl) i += 1 min_ages = np.append(min_ages, np.min(ages)) max_ages = np.append(max_ages, np.max(ages)) if vertical: ax.set_ylabel('Age (Ma)', fontsize = fs) if proxy == 'd13c': ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}}$ (‰)', fontsize = fs) else: ax.set_xlabel(proxy, fontsize = fs) ax.set_ylim([np.min(min_ages) - 1, np.max(max_ages) + 1]) ax.invert_yaxis() if horizontal: ax.set_xlabel('Age (Ma)', fontsize = fs) if proxy == 'd13c': ax.set_ylabel(r'$\delta^{13}$C$_{\mathrm{carb}}$ (‰)', fontsize = fs) else: ax.set_ylabel(proxy, fontsize = fs) ax.set_xlim([np.min(min_ages) - 1, np.max(max_ages) + 1]) ax.invert_xaxis() ax.tick_params(bottom = True, top = True, left = True, right = True, direction = 'in', labelsize = fs, width = 1.5) # for axis in ['top','bottom','left','right']: # ax.spines[axis].set_linewidth(1.5) if legend: ax.legend(loc='best') return fig
[docs] def interpolated_proxy_inference(interpolated_df, interpolated_proxy_df, proxy, legend = True, plot_samples = False, plot_mle = True, orientation = 'horizontal', section_legend = False, marker_size = 20, section_cmap = 'Spectral', **kwargs): """ Plot interpolated proxy signal over time (by extending the posterior section age models to a new proxy not included in the inference model) using interpolated age models from :py:meth:`extend_age_model() <stratmc.inference.extend_age_model>` and interpolated proxy values from :py:meth:`interpolate_proxy() <stratmc.inference.interpolate_proxy>` in :py:mod:`stratmc.inference`. .. plot:: from stratmc.data import load_object, load_trace from stratmc.inference import extend_age_model, interpolate_proxy from stratmc.plotting import interpolated_proxy_inference full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' example_sample_path_d18o = 'examples/example_sample_df_d18o' example_ages_path = 'examples/example_ages_df' sample_df = load_object(example_sample_path) sample_df_d18o = load_object(example_sample_path_d18o) ages_df = load_object(example_ages_path) interpolated_df = extend_age_model(full_trace, sample_df, ages_df, ['d18o'], new_proxy_df = sample_df_d18o) ages_new = full_trace.X_new.X_new.values.ravel() interpolated_proxy_df = interpolate_proxy(interpolated_df, 'd18o', ages_new) interpolated_proxy_inference(interpolated_df, interpolated_proxy_df, 'd18o') plt.show() Parameters ---------- interpolated_df: pandas.DataFrame :class:`pandas.DataFrame` with interpolated age draws and sample age summary statistics from :py:meth:`extend_age_model() <stratmc.inference.extend_age_model>` in :py:mod:`stratmc.inference`. interpolated_proxy_df: pandas.DataFrame :class:`pandas.DataFrame` with interpolated proxy values and summary statistics at target ages from :py:meth:`interpolate_proxy() <stratmc.inference.interpolate_proxy>` in :py:mod:`stratmc.inference`. proxy: str Name of new proxy (must match column name in ``interpolated_proxy_df``). legend: bool, optional Generate a legend. Defaults to ``True``. plot_samples: bool, optional Plot proxy observations by most likely posterior age. Defaults to ``False``. plot_mle: bool, optional Plot the maximum likelihood estimate. Defaults to ``True``. orientation: str, optional Orientation of figure ('horizontal' or 'vertical'). Defaults to 'horizontal'. marker_size: int, optional Size of markers if ``plot_samples`` is ``True``. Defaults to 20. section_legend: bool, optional Include section names in the legend (if ``plot_samples`` is ``True``). Defaults to ``False``. section_cmap: str, optional Name of seaborn color palette to use for sections (if ``plot_samples`` is ``True``). Defaults to 'Spectral'. Returns ------- fig: matplotlib.pyplot.figure Figure with interpolated proxy signal over time. """ if 'sections' in kwargs: sections = list(kwargs['sections']) else: sections = np.unique(interpolated_df['section']) if orientation == 'horizontal': horizontal = True vertical = False if orientation == 'vertical': horizontal = False vertical = True if 'figsize' in kwargs: figsize = kwargs['figsize'] elif horizontal: figsize = (9, 5) elif vertical: figsize = (5, 9) if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 fig=plt.figure(figsize=figsize) ax = fig.gca() cs = {} pal = sns.color_palette(section_cmap,n_colors=len(sections)) for i in range(len(sections)): section = sections[i] cs[section] = pal[i] min_ages = [] max_ages = [] if plot_samples: # by max likelihood for section in sections: section_df = interpolated_df[interpolated_df['section']==section] if section_legend: label = section else: label = '_nolegend_' if vertical: ax.scatter(section_df[proxy], section_df['mle'], s = marker_size, color = cs[section], label = label, edgecolors = 'k', lw = 0.5, zorder = 3) if horizontal: ax.scatter(section_df['mle'], section_df[proxy], s = marker_size, color = cs[section], label = label, lw = 0.5, edgecolors = 'k', zorder = 3) sec_ages = section_df['mle'].values min_ages = np.append(min_ages, np.min(sec_ages)) max_ages = np.append(max_ages, np.max(sec_ages)) if vertical: ax.fill_betweenx(interpolated_proxy_df['age'], interpolated_proxy_df['97.5'], interpolated_proxy_df['2.5'], color=(.95,.95,.95), label='95% envelope', linestyle = '--', lw = 1.5, edgecolor = 'k', zorder = 0) if horizontal: ax.fill_between(interpolated_proxy_df['age'], interpolated_proxy_df['97.5'], interpolated_proxy_df['2.5'], color=(.95,.95,.95), label='95% envelope', linestyle = '--', edgecolor = 'k', lw = 1.5, zorder = 0) if vertical: ax.fill_betweenx(interpolated_proxy_df['age'], interpolated_proxy_df['84'], interpolated_proxy_df['16'], color=(.8,.8,.8), label='68% envelope', edgecolor = 'k', lw = 1.5, zorder = 1) if horizontal: ax.fill_between(interpolated_proxy_df['age'], interpolated_proxy_df['84'], interpolated_proxy_df['16'], color=(.8,.8,.8), label='68% envelope', edgecolor = 'k', lw = 1.5, zorder = 1) if plot_mle: if horizontal: ax.plot(interpolated_proxy_df['age'], interpolated_proxy_df['mle'], zorder = 2, color = 'k', linestyle = 'solid', lw = 2, label = 'Most likely ' + str(proxy)) if vertical: ax.plot(interpolated_proxy_df['mle'], interpolated_proxy_df['age'], zorder = 2, color = 'k', linestyle = 'solid', lw = 2, label = 'Most likely ' + str(proxy)) min_ages = np.append(min_ages, np.min(interpolated_proxy_df['age'])) max_ages = np.append(max_ages, np.max(interpolated_proxy_df['age'])) if vertical: ax.set_ylabel('Age (Ma)', fontsize = fs) ax.set_xlabel(proxy, fontsize = fs) ax.set_ylim([np.min(min_ages) - 1, np.max(max_ages) + 1]) ax.invert_yaxis() if horizontal: ax.set_xlabel('Age (Ma)', fontsize = fs) ax.set_ylabel(proxy, fontsize = fs) ax.set_xlim([np.min(min_ages) - 1, np.max(max_ages) + 1]) ax.invert_xaxis() ax.tick_params(bottom = True, top = True, left = True, right = True, direction = 'in', labelsize = fs, width = 1.5) if legend: ax.legend(loc='best') return fig
[docs] def age_height_model(sample_df, ages_df, full_trace, include_excluded_samples = True, plot_samples = True, cmap = 'Spectral', legend = False, **kwargs): """ Generate a posterior age-height plot for each section. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import age_height_model full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' example_ages_path = 'examples/example_ages_df' sample_df = load_object(example_sample_path) ages_df = load_object(example_ages_path) age_height_model(sample_df, ages_df, full_trace) plt.show() Parameters ---------- sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. ages_df: pandas.DataFrame :class:`pandas.DataFrame` containing age constraints for all sections. full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. sections: list(str) or numpy.array(str), optional List of sections to plot. Defaults to all sections in ``sample_df``. cmap: str, optional Name of seaborn color palette to use for sections. Defaults to 'Spectral'. legend: bool, optional Generate a legend. Defaults to ``False``. include_excluded_samples: bool, optional Whether to consider excluded samples (``Exclude?`` is ``True`` in ``sample_df``) whose ages were passively tracked in the inference model. Defaults to ``True``. plot_samples: bool, optional Plot proxy observations by most likely posterior age. Defaults to ``True``. Returns ------- fig: matplotlib.pyplot.figure Figure with age-height models for each section. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'sections' in kwargs: sections = list(kwargs['sections']) else: sections = np.unique(sample_df.dropna(subset = proxies, how = 'all')['section']) sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections) cs = {} pal = sns.color_palette(cmap, n_colors=len(sections)) cols = 4 N = len(sections) rows = int(math.ceil(N/cols)) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (11, 2.5*rows)) for n in range(N): section = sections[n] cs[section] = pal[n] ax = fig.add_subplot(gs[n]) sec_ages = az.extract(full_trace.posterior)[str(section)+'_ages'].values # posterior_predictive section_df = sample_df[sample_df['section']==section] lo = np.percentile(sec_ages, 2.5, axis=1).flatten() hi = np.percentile(sec_ages, 97.5, axis=1).flatten() if include_excluded_samples: ax.fill_betweenx(section_df['height'], hi, lo, color=(.95,.95,.95), linestyle = 'dotted', label=r'95% envelope') else: included_idx = ~section_df['Exclude?'].values.astype(bool) ax.fill_betweenx(section_df['height'][included_idx], hi[included_idx], lo[included_idx], color=(.95,.95,.95), linestyle='dotted', label=r'95% envelope') lo = np.percentile(sec_ages,17,axis=1).flatten() hi = np.percentile(sec_ages,100-17,axis=1).flatten() if include_excluded_samples: ax.fill_betweenx(section_df['height'], hi, lo, color=(.8,.8,.8), linestyle='dashed', label=r'66% envelope') else: ax.fill_betweenx(section_df['height'][included_idx], hi[included_idx], lo[included_idx], color=(.8,.8,.8), linestyle='dashed', label=r'66% envelope') lo = np.percentile(sec_ages,33.5,axis=1).flatten() hi = np.percentile(sec_ages,100-33.5,axis=1).flatten() if include_excluded_samples: ax.fill_betweenx(section_df['height'], hi, lo, color=(.65,.65,.65), linestyle='solid', label=r'33% envelope') else: ax.fill_betweenx(section_df['height'][included_idx], hi[included_idx], lo[included_idx], color=(.65,.65,.65), linestyle='solid', label=r'33% envelope') if plot_samples: # calculate MLE max_like = np.zeros(sec_ages.shape[0]) for i in np.arange(sec_ages.shape[0]): sample_ages = sec_ages[i,:] dx = np.linspace(np.min(sample_ages), np.max(sample_ages), 1000) max_like[i] = dx[np.argmax(gaussian_kde(sample_ages, bw_method = 1)(dx))] if include_excluded_samples: ax.scatter(max_like, section_df['height'], color = cs[section], linewidth = 1, linestyle = 'solid', marker = 'x', s = 10, label = 'Most likely sample age') else: ax.scatter(max_like[included_idx], section_df['height'][included_idx], color = cs[section], linewidth = 1, linestyle = 'solid', marker = 'x', s = 10, label = 'Most likely sample age') ax.errorbar(ages_df['age'][ages_df['section']==section], ages_df['height'][ages_df['section']==section], xerr = 2*ages_df['age_std'][ages_df['section']==section], color = cs[section], fmt = 'none', capsize = 3) section_ages_df = ages_df[(ages_df['section']==section) & (~ages_df['Exclude?'])] ax.scatter(section_ages_df['age'][(~section_ages_df['intermediate intrusive?']) & (~section_ages_df['intermediate detrital?'])], section_ages_df['height'][(~section_ages_df['intermediate intrusive?']) & (~section_ages_df['intermediate detrital?'])], color = cs[section], lw = 0.5, edgecolor = 'k', label = r'Age ($\pm2\sigma$)') if section_ages_df['age'][section_ages_df['intermediate intrusive?']].shape[0] > 0: ax.scatter(section_ages_df['age'][section_ages_df['intermediate intrusive?']], section_ages_df['height'][section_ages_df['intermediate intrusive?']], marker = 's', color = cs[section], edgecolor = 'k', lw = 0.5, label = r'Intrusive Age ($\pm2\sigma$)') if section_ages_df['age'][section_ages_df['intermediate detrital?']].shape[0] > 0: ax.scatter(section_ages_df['age'][section_ages_df['intermediate detrital?']], section_ages_df['height'][section_ages_df['intermediate detrital?']], marker = '^', color = cs[section], edgecolor = 'k', lw = 0.5, label = r'Detrital Age ($\pm2\sigma$)') if legend: plt.legend(loc = 'upper right', bbox_to_anchor=(1, 1), markerfirst = False, frameon = False, fontsize = 8) ax.axis('tight') ax.set_xlabel('Age (Ma)', fontsize = fs) ax.set_ylabel('Height (m)', fontsize = fs) ax.tick_params(bottom = True, top = True, left = True, right = True, direction = 'in', labelsize = fs) ax.set_title(section, fontsize = fs) fig.tight_layout() plt.subplots_adjust(wspace=0.35) return fig
[docs] def section_proxy_signal(full_trace, sample_df, ages_df, include_radiometric_ages = False, plot_constraints = False, plot_mle = False, yax = 'height', legend = False, cmap = 'Spectral', **kwargs): """ Map the posterior proxy signal back to height in each section (using its most likely posterior age model), and plot alongside the proxy observations (plotted by most likely posterior age). .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import section_proxy_signal full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' example_ages_path = 'examples/example_ages_df' sample_df = load_object(example_sample_path) ages_df = load_object(example_ages_path) section_proxy_signal(full_trace, sample_df, ages_df) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. ages_df: pandas.DataFrame :class:`pandas.DataFrame` containing age constraints for all sections. include_radiometric_ages: bool, optional Whether to consider radiometric ages in the posterior age model for each section. Defaults to ``False``. plot_constraints: bool, optional Plot age constraints for each section as dashed lines. Defaults to ``False``. plot_mle: bool, optional Plot the maximum likelihood estimate for the proxy signal as a line. Defaults to ``False``. yax: str, optional Scale for the y-axis ('height' or 'age'). Defaults to 'height'. legend: bool, optional Generate a legend. Defaults to ``True``. cmap: str, optional Name of seaborn color palette to use for sections. Defaults to 'Spectral'. proxy: str, optional Tracer to plot; only required if more than one proxy was included in the inference. Returns ------- fig: matplotlib.pyplot.figure Figure with inferred proxy signal mapped to height in each section. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] if 'sections' in kwargs: sections = list(kwargs['sections']) else: # only want to map signal back to sections that actually have data for this proxy sections = np.unique(sample_df.dropna(subset = [proxy], how = 'all')['section']) sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections) cs = {} pal = sns.color_palette(cmap, n_colors=len(sections)) N = len(sections) cols = np.min([N, 4]) rows = int(math.ceil(N/cols)) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (2.5 * cols, 4*rows)) proxy_pred = az.extract(full_trace.posterior_predictive)['f_pred_' + proxy].values ages_new = full_trace.X_new.X_new.values if plot_mle: # calculate MLE dy = np.linspace(np.min(proxy_pred), np.max(proxy_pred), 200) max_like = np.zeros(ages_new.size) for i in np.arange(ages_new.size): time_slice = proxy_pred[i,:] max_like[i] = dy[np.argmax(gaussian_kde(time_slice, bw_method = 1)(dy))] max_like = gaussian(max_like, 2) mapped_age_models = map_ages_to_section(full_trace, sample_df, ages_df, sections = sections, include_radiometric_ages = include_radiometric_ages) for n in range(N): section = sections[n] cs[section] = pal[n] ax = fig.add_subplot(gs[n]) ax.tick_params(bottom = True, top = False, left = True, right = False, direction = 'out', labelsize = fs, width = 1.5) ax.set_title(section, fontsize = fs) # calculate maximum likelihood age for each sample section_df = sample_df[sample_df['section']==section] proxy_idx = (~np.isnan(section_df[proxy])) & (~section_df['Exclude?'].values.astype(bool)) excluded_idx = (~np.isnan(section_df[proxy])) & (section_df['Exclude?']) section_age_model = mapped_age_models[mapped_age_models['section']==section] section_age_vec = section_age_model['age'].values above = (ages_new <= np.max(section_age_vec)) below = (ages_new >= np.min(section_age_vec)) age_idx = above & below # plot data if yax == 'height': # samples included in inference ax.scatter(section_df[proxy].values[proxy_idx], section_df['height'].values[proxy_idx], color = cs[section], edgecolor = 'k', zorder = 3) # excluded samples ax.scatter(section_df[proxy].values[excluded_idx], section_df['height'].values[excluded_idx], color = 'none', edgecolor = cs[section], zorder = 4) # plot posterior proxy signal hi = np.percentile(proxy_pred, 97.5, axis=1).flatten() lo = np.percentile(proxy_pred, 2.5, axis=1).flatten() ax.fill_betweenx(section_age_model['interpolated height'], hi[age_idx], lo[age_idx], color=(.95,.95,.95), label='95% envelope', linestyle = 'dotted', lw = 1.5, edgecolor = 'k', zorder = 0) hi = np.percentile(proxy_pred, 100-17, axis=1).flatten() lo = np.percentile(proxy_pred, 17, axis=1).flatten() ax.fill_betweenx(section_age_model['interpolated height'], hi[age_idx], lo[age_idx], color=(.8,.8,.8), label='66% envelope', linestyle = 'dashed', lw = 1.5, edgecolor = 'k', zorder = 1) hi = np.percentile(proxy_pred, 100-33.5, axis=1).flatten() lo = np.percentile(proxy_pred, 33.5, axis=1).flatten() ax.fill_betweenx(section_age_model['interpolated height'], hi[age_idx], lo[age_idx], color=(.65,.65,.65), label='33% envelope', linestyle = 'solid', lw = 1.5, edgecolor = 'k', zorder = 2) if plot_mle: ax.plot(max_like[age_idx], section_age_model['interpolated height'], color = 'k', lw = 2, label = 'Most likely', zorder = 3) if plot_constraints: section_ages_df = ages_df[(ages_df['section']==section) & (~ages_df['Exclude?'])] for h in section_ages_df['height']: ax.axhline(h, color = cs[section], linestyle = 'dashed', zorder = -1) if proxy == 'd13c': ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}}$ (‰)', fontsize = fs) else: ax.set_xlabel(proxy, fontsize = fs) elif yax == 'age': # plot posterior proxy signal hi = np.percentile(proxy_pred, 97.5, axis=1).flatten() lo = np.percentile(proxy_pred, 2.5, axis=1).flatten() ax.fill_betweenx(ages_new[age_idx], hi[age_idx], lo[age_idx], color=(.95,.95,.95), label='95% envelope', linestyle = 'dotted', lw = 1.5, edgecolor = 'k', zorder = 0) hi = np.percentile(proxy_pred, 100-17, axis=1).flatten() lo = np.percentile(proxy_pred, 17, axis=1).flatten() ax.fill_betweenx(ages_new[age_idx], hi[age_idx], lo[age_idx], color=(.8,.8,.8), label='66% envelope', linestyle = 'dashed', lw = 1.5, edgecolor = 'k', zorder = 1) hi = np.percentile(proxy_pred, 100-33.5, axis=1).flatten() lo = np.percentile(proxy_pred, 33.5, axis=1).flatten() ax.fill_betweenx(ages_new[age_idx], hi[age_idx], lo[age_idx], color=(.65,.65,.65), label='33% envelope', linestyle = 'solid', lw = 1.5, edgecolor = 'k', zorder = 2) if plot_mle: ax.plot(max_like[age_idx], ages_new[age_idx], color = 'k', lw = 2, label = 'Most likely', zorder = 3) sec_ages = az.extract(full_trace.posterior)[str(section)+'_ages'].values max_like_samples = np.zeros(sec_ages.shape[0]) for i in np.arange(sec_ages.shape[0]): sample_ages = sec_ages[i,:] dx = np.linspace(np.min(sample_ages), np.max(sample_ages), 1000) max_like_samples[i] = dx[np.argmax(gaussian_kde(sample_ages, bw_method = 1)(dx))] ax.scatter(section_df[proxy].values[proxy_idx], max_like_samples[proxy_idx], color = cs[section], edgecolor = 'k', zorder = 4) ax.scatter(section_df[proxy].values[excluded_idx], max_like_samples[excluded_idx], color = 'none', edgecolor = cs[section], zorder = 5) if plot_constraints: section_ages_df = ages_df[(ages_df['section']==section) & (~ages_df['Exclude?'])] for age in section_ages_df['age']: ax.axhline(age, color = cs[section], linestyle = 'dashed', zorder = -1) ax.invert_yaxis() if proxy == 'd13c': ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}}$ (‰)', fontsize = fs) else: ax.set_xlabel(proxy, fontsize = fs) if (n == 0) | ((n % 4) == 0): if yax == 'height': ax.set_ylabel('Height (m)', fontsize = fs) elif yax == 'age': ax.set_ylabel('Age (Ma)', fontsize = fs) if (n == 0) and legend: ax.legend() fig.tight_layout() return fig
[docs] def covariance_hyperparameters(full_trace, figsize = (4, 3.5), **kwargs): """ Plot prior and posterior distributions for the lengthscale (:math:`\\ell`) and variance (:math:`\\sigma`) hyperparameters of the :class:`pymc.gp.cov.ExpQuad <pymc.gp.cov.ExpQuad>` Gaussian process covariance kernel: .. math:: k(x, x') = \\sigma^2 \\mathrm{exp} \\left[-\\frac{(x - x')^2}{2 \\ell^2} \\right] .. plot:: from stratmc.data import load_trace from stratmc.plotting import covariance_hyperparameters full_trace = load_trace('examples/example_docs_trace') covariance_hyperparameters(full_trace) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. proxy: str, optional Tracer to plot model parameters for (each proxy has a different covariance kernel); only required if more than one proxy was included in the inference. Returns ------- fig: matplotlib.pyplot.figure Figure with prior and posterior model parameters. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] fig, ax = plt.subplots(2, figsize = figsize) sns.kdeplot(full_trace.prior['gp_ls_' + proxy].values.ravel(), color='indianred', label = 'Prior', ax = ax[0], cut = 0) sns.kdeplot(full_trace.posterior['gp_ls_' + proxy].values.ravel(), color = 'indianred', label = 'Posterior', fill = True, edgecolor = 'None', ax = ax[0], cut = 0) ax[0].set_xlabel('Lengthscale', fontsize=fs) ax[0].ticklabel_format(axis="x", style="sci", scilimits=(0,0)) ax[0].tick_params(labelsize=fs) ax[0].set_ylabel("") ax[0].set_yticks([]) ax[0].ticklabel_format(style='plain') ax[0].legend() sns.kdeplot(full_trace.prior['gp_var_' + proxy].values.ravel(), color='#87BED5', label = 'prior', ax = ax[1], cut = 0) sns.kdeplot(full_trace.posterior['gp_var_' + proxy].values.ravel(), color = '#87BED5', label = 'posterior', fill = True, edgecolor = 'None', ax = ax[1], cut = 0) ax[1].set_xlabel('Variance', fontsize=fs) ax[1].ticklabel_format(axis="x", style="sci", scilimits=(0,0)) ax[1].tick_params(labelsize=fs) ax[1].set_ylabel("") ax[1].set_yticks([]) ax[1].ticklabel_format(style='plain') ax[1].legend() fig.tight_layout() return fig
[docs] def section_summary(sample_df, ages_df, full_trace, section, plot_excluded_samples = False, plot_noise_prior = False, plot_offset_prior = False, include_age_constraints_sedrate = True, figsize = (8, 9)): """ For a given section, plot posterior estimates of sample age, sedimentation rate, noise, and offset. Noise and offset terms must be either per-section or global; to plot per-sample noise and offset terms, use :py:meth:`noise_summary() <stratmc.plotting>` and :py:meth:`offset_summary() <stratmc.plotting>`. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import section_summary full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' example_ages_path = 'examples/example_ages_df' sample_df = load_object(example_sample_path) ages_df = load_object(example_ages_path) section_summary(sample_df, ages_df, full_trace, '1') plt.show() Parameters ---------- sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. ages_df: pandas.DataFrame :class:`pandas.DataFrame` containing age constraints for all sections. full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. section: str Name of target section. plot_excluded_samples: bool, optional Plot age estimates for proxy observations that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``. plot_noise_prior: bool, optional Plot prior distribution for noise term. Defaults to ``False``. plot_offset_prior: bool, optional Plot prior distribution for offset term. Defaults to ``False``. include_age_constraints_sedrate: bool, optional Include age constraints in sedimentation rate calculations. Defaults to ``True``. Returns ------- fig: matplotlib.pyplot.figure Figure summarizing posterior sample ages, sedimentation rate, and posterior noise and offset terms for the input section. """ # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if str(section) + '_section_noise_' + proxies[0] in list(full_trace.posterior.keys()): noise_type = 'section' else: noise_type = 'groups' if str(section) + '_section_offset_' + proxies[0] in list(full_trace.posterior.keys()): offset_type = 'section' else: group_offset_vars = [ l for l in list(full_trace["posterior"].data_vars.keys()) if f"{'_group_offset_'}" in l ] if len(group_offset_vars) > 0: offset_type = 'groups' else: offset_type = 'none' if (noise_type == 'none') and (offset_type == 'none'): n_rows = 3 elif ((noise_type != 'none') or (offset_type != 'none')) and not ((noise_type != 'none') and (offset_type != 'none')): n_rows = 4 elif ((noise_type != 'none') and (offset_type != 'none')): n_rows = 5 sample_df, ages_df = clean_data(sample_df, ages_df, proxies, [section]) fig, ax = plt.subplots(n_rows, 1, figsize = figsize, constrained_layout=True, gridspec_kw={'height_ratios': [1, 1, 1.5] + [0.5] * (n_rows - 3)}) # link axes with age on the x-axis ax[0].sharex(ax[1]) ax[2].sharex(ax[1]) pal = sns.color_palette("flare", n_colors = np.mean(az.extract(full_trace.posterior)[str(section)+'_ages'].values, axis = 1).size, desat=0.8) sns.set_palette(pal) samples = np.mean(az.extract(full_trace.posterior)[str(section)+'_ages'], axis = 1).size # posterior_predictive sec_ages = az.extract(full_trace.posterior)[str(section)+'_ages'].values # posterior_predictive section_df = sample_df[sample_df['section']==section] excluded_idx = section_df['Exclude?'].values for i in range(samples): if not excluded_idx[i]: sns.kdeplot(sec_ages[i,:].ravel().astype(float), label = 'posterior', fill = True, edgecolor = None, color = pal[i], alpha = 0.3, ax = ax[0], cut = 0) else: if plot_excluded_samples: sns.kdeplot(sec_ages[i,:].ravel().astype(float), label = 'posterior', fill = True, edgecolor = None, color = pal[i], alpha = 0.3, ax = ax[0], cut = 0) yl = ax[0].get_ylim() ax[0].vlines(ages_df['age'][ages_df['section']==section], yl[0], yl[1], color = '#8c8684', linestyle = 'dashed', zorder = 3, label = 'Age constraint') for i in range(len(ages_df['age'][ages_df['section']==section])): ax[0].axvspan(ages_df['age'][ages_df['section']==section].values[i] - ages_df['age_std'][ages_df['section']==section].values[i], ages_df['age'][ages_df['section']==section].values[i] + ages_df['age_std'][ages_df['section']==section].values[i], alpha=0.5, color ='#8c8684') ax[0].invert_xaxis() ax[0].set_ylim(yl) ax[0].set_ylabel('Sample ages', fontsize = 12) ax[0].set_yticks([]) # age constraint posteriors constraint_ages = az.extract(full_trace.posterior)[str(section)+'_radiometric_age'].values prior_constraint_ages = az.extract(full_trace.prior)[str(section)+'_radiometric_age'].values samples = np.mean(constraint_ages, axis = 1).size pal = sns.color_palette('crest',n_colors=samples) for i in range(samples): if i == 0: post_label = 'Posterior' prior_label = 'Prior' else: post_label = '_nolegend' prior_label = '_nolegend' sns.kdeplot(constraint_ages[i,:].ravel(), label = post_label, fill = True, color = pal[i], edgecolor='None', alpha = 0.3, ax = ax[1], cut = 0) sns.kdeplot(prior_constraint_ages[i,:].ravel(), label = prior_label, color = pal[i], fill = False, ax = ax[1], cut = 0) ax[1].set_ylabel('Age constraints', fontsize = 12) ax[1].set_yticks([]) ax[1].legend() # sedimentation rate age_bins = 50 rate_bins = 50 rate_df = accumulation_rate(full_trace, sample_df, ages_df, sections = section, method = 'successive', include_age_constraints = include_age_constraints_sedrate) age_bin_edges = np.linspace(np.min(rate_df['top_age']), np.max(rate_df['base_age']), age_bins) rate_bin_edges = np.logspace(np.log10(rate_df['rate'].min()), np.log10(rate_df['rate'].max()), rate_bins) # get age bin centers age_centers = (age_bin_edges[:-1] + age_bin_edges[1:]) / 2 age_vec = [] rate_vec = [] for rate, base, top in zip(rate_df['rate'], rate_df['base_age'], rate_df['top_age']): for i in np.arange(len(age_bin_edges)-1): center = age_centers[i] current_bin = pd.Interval(age_bin_edges[i], age_bin_edges[i+1], closed = 'left') current_sample_pair = pd.Interval(top, base, closed = 'right') result = current_bin.overlaps(current_sample_pair) if result: age_vec.append(center) rate_vec.append(rate) H, xedges, yedges = np.histogram2d(age_vec, rate_vec, bins=[age_bin_edges, rate_bin_edges], density = False) H[H == 0] = np.nan H = H/np.nansum(H, axis=1,keepdims=1) # each column (age bin) should sum to 1 m = ax[2].pcolormesh(xedges, yedges, H.T, cmap = 'jet') ax[2].set_yscale('log') ax[2].set_ylabel('LOG$_{10}$ (Accumulation rate [m/Myr])', fontsize = 12) ax[2].set_xlabel('Age (Ma)', fontsize = 12) cb = plt.colorbar(m) cb.set_label(label='Probability density', fontsize = 12) # noise posteriors if noise_type != 'none': if noise_type == 'section': pal = sns.color_palette("deep", n_colors = len(proxies)) if noise_type == 'groups': palettes = ['crest', 'flare', 'Purples', 'Grays', 'Reds', 'Greens', 'YlOrBr', 'Oranges'] for i in np.arange(len(proxies)): proxy = proxies[i] if noise_type == 'section': posterior_noise_dist = az.extract(full_trace.posterior)[str(section) + '_section_noise_' + proxy].values.ravel() prior_noise_dist = az.extract(full_trace.prior)[str(section) + '_section_noise_' + proxy].values.ravel() sns.kdeplot(posterior_noise_dist, fill = True, ax = ax[3], edgecolor='none', color = pal[i], alpha = 0.3, label = proxy + ' posterior', cut = 0) if plot_noise_prior: sns.kdeplot(prior_noise_dist, fill = False, ax = ax[3], color = pal[i], alpha = 0.3, label = proxy + ' prior', cut = 0) elif noise_type == 'groups': noise_vars = [ l for l in list(full_trace["posterior"].data_vars.keys()) if f"{'_group_noise_' + str(proxy)}" in l ] pal = sns.color_palette(palettes[i], n_colors = len(noise_vars)) for j, var in enumerate(noise_vars): posterior_noise_dist = az.extract(full_trace.posterior)[var].values.ravel() sns.kdeplot(posterior_noise_dist, fill = True, ax = ax[3], color = pal[j], alpha = 0.3, label = var + ' posterior', cut = 0 ) if plot_noise_prior: prior_noise_dist = az.extract(full_trace.prior)[var].values.ravel() sns.kdeplot(prior_noise_dist, fill = False, ax = ax[3], color = pal[j], alpha = 0.3, label = var + ' prior', cut = 0 ) if (len(proxies) > 1) | (noise_type == 'groups'): ax[3].legend() ax[3].set_xlabel('Noise', fontsize = 12) ax[3].set_yticks([]) ax[3].set_ylabel(None) # offset posteriors if offset_type != 'none': if offset_type == 'section': pal = sns.color_palette("deep", n_colors = len(proxies)) if offset_type == 'groups': palettes = ['crest', 'flare', 'Purples', 'Grays', 'Reds', 'Greens', 'YlOrBr', 'Oranges'] for i in np.arange(len(proxies)): proxy = proxies[i] if offset_type == 'section': posterior_offset_dist = az.extract(full_trace.posterior)[str(section) + '_section_offset_' + proxy].values.ravel() sns.kdeplot(posterior_offset_dist, fill = True, ax = ax[n_rows - 1], edgecolor='none', color = pal[i], alpha = 0.3, label = proxy + ' posterior', cut = 0) if plot_offset_prior: prior_offset_dist = az.extract(full_trace.prior)[str(section) + '_section_offset_' + proxy].values.ravel() sns.kdeplot(prior_offset_dist, fill = False, ax = ax[n_rows - 1], color = pal[i], alpha = 0.3, label = proxy + ' prior', cut = 0) elif offset_type == 'groups': offset_vars = [ l for l in list(full_trace["posterior"].data_vars.keys()) if f"{'_group_offset_' + str(proxy)}" in l ] pal = sns.color_palette(palettes[i], n_colors = len(offset_vars)) for j, var in enumerate(offset_vars): posterior_offset_dist = az.extract(full_trace.posterior)[var].values.ravel() sns.kdeplot(posterior_offset_dist, fill = True, ax = ax[n_rows - 1], edgecolor='none', color = pal[j], alpha = 0.3, label = var + ' posterior', cut = 0) if plot_offset_prior: prior_offset_dist = az.extract(full_trace.prior)[var].values.ravel() sns.kdeplot(prior_offset_dist, fill = False, ax = ax[n_rows - 1], color = pal[j], alpha = 0.3, label = var + ' prior', cut = 0) if (len(proxies) > 1) | (offset_type == 'groups'): ax[n_rows - 1].legend() ax[n_rows - 1].set_xlabel('Offset', fontsize = 12) ax[n_rows - 1].set_yticks([]) ax[n_rows - 1].set_ylabel(None) plt.setp(ax[0].get_xticklabels(), visible=False) plt.setp(ax[1].get_xticklabels(), visible=False) for axis in ax.ravel(): axis.tick_params(bottom = True, top = False, left = False, right = False, direction = 'in', labelsize = 12) fig.suptitle('Section: ' + str(section), fontsize = 14) return fig
[docs] def noise_summary(full_trace, **kwargs): """ Plot posterior noise distributions for each section or group of samples (depending on ``noise_type`` used in :py:meth:`build_model() <stratmc.model.build_model>`) for a given proxy. If multiple proxies were included in the inference, pass a ``proxy`` argument to specify which noise terms to plot. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import noise_summary full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' sample_df = load_object(example_sample_path) sections = np.unique(sample_df['section'].values) noise_summary(full_trace) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. proxy: str, optional Target proxy; only required if more than one proxy was included in the inference. Returns ------- fig: matplotlib.pyplot.figure Figure with noise distributions for each section or group of samples. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] section_noise_vars = [ l for l in list(full_trace["posterior"].data_vars.keys()) if f"{'_section_noise_' + str(proxy)}" in l ] if len(section_noise_vars) > 0: noise_vars = section_noise_vars else: noise_vars = [ l for l in list(full_trace["posterior"].data_vars.keys()) if f"{'_group_noise_' + str(proxy)}" in l ] if len(noise_vars) == 0: sys.exit(f"No noise terms in the model") N = len(noise_vars) cols = int(np.min([4, N])) pal = sns.color_palette("deep", n_colors = len(noise_vars)) rows = int(math.ceil(N/cols)) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (10, 2.5*rows)) for n in range(N): ax = fig.add_subplot(gs[n]) var = noise_vars[n] ax.set_title(str(var), fontsize = fs) posterior_noise_dist = full_trace.posterior[var].values.ravel() sns.kdeplot(posterior_noise_dist, ax = ax, fill = True, edgecolor='None', color = pal[n], alpha = 0.5, label = proxy, cut = 0) ax.set_ylabel('') ax.set_yticks([]) ax.tick_params(labelsize = fs) fig.supxlabel('Noise (%s)' % proxy, fontsize = fs) fig.tight_layout() plt.subplots_adjust(wspace=0.15, hspace=0.4) return fig
[docs] def offset_summary(full_trace, **kwargs): """ Plot posterior offset distributions for each section or group of samples (depending on ``offset_type`` used in :py:meth:`build_model() <stratmc.model.build_model>`). If multiple proxies were included in the inference, pass a ``proxy`` argument to specify which offset terms to plot. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import offset_summary full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' sample_df = load_object(example_sample_path) offset_summary(full_trace) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. proxy: str, optional Target proxy; only required if more than one proxy was included in the inference. Returns ------- fig: matplotlib.pyplot.figure Figure with offset distributions for each section or group of samples. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] section_offset_vars = [ l for l in list(full_trace["posterior"].data_vars.keys()) if f"{'_section_offset_' + str(proxy)}" in l ] if len(section_offset_vars) > 0: offset_vars = section_offset_vars else: offset_vars = [ l for l in list(full_trace["posterior"].data_vars.keys()) if f"{'_group_offset_' + str(proxy)}" in l ] if len(offset_vars) == 0: sys.exit(f"No offset terms in the model") N = len(offset_vars) cols = int(np.min([4, N])) pal = sns.color_palette("deep", n_colors = len(offset_vars)) rows = int(math.ceil(N/cols)) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (10, 2.5*rows)) for n in range(N): ax = fig.add_subplot(gs[n]) var = offset_vars[n] ax.set_title(var, fontsize = fs) # shape = samples x draws posterior_offset_dist = az.extract(full_trace.posterior)[var].values.ravel() sns.kdeplot(posterior_offset_dist, ax = ax, fill = True, edgecolor = 'None', color = pal[n], alpha = 0.3, label = var, cut = 0 ) ax.set_ylabel('') ax.set_yticks([]) ax.tick_params(labelsize = fs) fig.supxlabel('Offset (%s)' % proxy, fontsize = fs) fig.tight_layout() plt.subplots_adjust(wspace=0.15, hspace=0.4) return fig
[docs] def section_proxy_residuals(full_trace, sample_df, legend = True, cmap = 'Spectral', plot_mle = False, include_excluded_samples = False, **kwargs): """ Plot the residuals between the observed proxy values for each section and the inferred proxy signal (using the posterior section age models to map the signal back to height in section). Use to check for stratigraphic trends in the residuals, which may give insight to the processes that cause noisy sections to deviate from the inferred common signal. If multiple proxies were included in the inference, pass a ``proxy`` argument. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import section_proxy_residuals full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' sample_df = load_object(example_sample_path) section_proxy_residuals(full_trace, sample_df) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. legend: bool, optional Generate a legend. Defaults to ``True``. cmap: str, optional Name of seaborn color palette to use for sections. Defaults to 'Spectral'. plot_mle: bool, optional Plot the maximum likelihood estimate as a line. Defaults to ``False``. proxy: str, optional Target proxy; only required if more than one proxy was included in the inference. include_excluded_samples: bool, optional Whether to plot the residuals for samples that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``. Returns ------- fig: matplotlib.pyplot.figure Figure with residuals for each section. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] if 'sections' in kwargs: sections = list(kwargs['sections']) else: sections = np.unique(sample_df[~np.isnan(sample_df[proxy])]['section']) cs = {} pal = sns.color_palette(cmap, n_colors=len(sections)) N = len(sections) cols = np.min([N, 4]) rows = int(math.ceil(N/cols)) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (2.5 * cols, 4*rows)) proxy_pred = az.extract(full_trace.posterior_predictive)['f_pred_' + proxy].values ages_new = full_trace.X_new.X_new.values for n in range(N): section = sections[n] cs[section] = pal[n] ax = fig.add_subplot(gs[n]) ax.set_title(section, fontsize = fs) section_sample_df = sample_df[sample_df['section'] == section] if include_excluded_samples: included_idx = (~np.isnan(section_sample_df[proxy])) else: included_idx = (~np.isnan(section_sample_df[proxy])) & (~section_sample_df['Exclude?']) section_ages = az.extract(full_trace.posterior)[str(section) + '_ages'].values[included_idx, :] section_proxy_true = section_sample_df[included_idx][proxy].values section_heights = section_sample_df[included_idx]['height'].values section_proxy_pred = np.ones((len(section_proxy_true), proxy_pred.shape[1])) * np.nan section_residuals = np.ones((len(section_proxy_true), proxy_pred.shape[1])) * np.nan for i in np.arange(proxy_pred.shape[1]): section_proxy_pred[:, i] = np.interp(section_ages[:, i], ages_new, proxy_pred[:, i]) # residual = actual - predicted section_residuals[:, i] = section_proxy_true - section_proxy_pred[:, i] hi = np.percentile(section_residuals, 97.5, axis = 1).flatten() lo = np.percentile(section_residuals, 2.5, axis = 1).flatten() ax.fill_betweenx(section_heights, hi, lo, color=(.95,.95,.95), label='95% envelope', linestyle = 'dotted', lw = 1.5, edgecolor = 'k', zorder = 0) hi = np.percentile(section_residuals, 100-17, axis=1).flatten() lo = np.percentile(section_residuals, 17, axis=1).flatten() ax.fill_betweenx(section_heights, hi, lo, color=(.8,.8,.8), label='66% envelope', linestyle = 'dashed', lw = 1.5, edgecolor = 'k', zorder = 1) hi = np.percentile(section_residuals, 100-33.5, axis=1).flatten() lo = np.percentile(section_residuals, 33.5, axis=1).flatten() ax.fill_betweenx(section_heights, hi, lo, color=(.65,.65,.65), label='33% envelope', linestyle = 'solid', lw = 1.5, edgecolor = 'k', zorder = 2) if plot_mle: # calculate mle dy = np.linspace(np.min(section_residuals.ravel()), np.max(section_residuals.ravel()), 200) max_like = np.zeros(section_heights.size) for i in np.arange(section_heights.size): height_slice = section_residuals[i,:] max_like[i] = dy[np.argmax(gaussian_kde(height_slice, bw_method = 1)(dy))] ax.plot(max_like, section_heights, color = 'k', lw = 2, zorder = 3, label = 'Most likely') ax.set_ylabel('Height (m)', fontsize = fs) if legend and n == 0: ax.legend(loc = 'upper right') fig.supxlabel('Residual ' + str(proxy) + ' value (observed - predicted)', fontsize = fs) fig.tight_layout() return fig
[docs] def sample_ages(full_trace, sample_df, section, plot_excluded_samples = False, plot_prior = True, cmap = 'viridis'): """ Plot sample age prior and posterior distributions for a given section. Each subplot contains posterior distributions for a different sample. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import sample_ages full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' sample_df = load_object(example_sample_path) section = '1' sample_ages(full_trace, sample_df, section) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of posterior and prior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. section: str Name of target section. plot_excluded_samples: bool, optional Plot age distributions for proxy observations that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``. plot_prior: bool, optional Plot prior distributions for sample ages. Defaults to ``True``. cmap: str, optional Name of seaborn color palette to use for age distributions. Defaults to 'viridis'. Returns ------- fig: matplotlib.pyplot.figure Figure with prior and posterior sample age distributions. """ # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) sample_df, _ = clean_data(sample_df, None, proxies, [section]) vals = az.extract(full_trace.posterior)[str(section)+'_ages'].values prior_vals = az.extract(full_trace.prior)[str(section)+'_ages'].values print(vals.shape) print(prior_vals.shape) if not plot_excluded_samples: included_idx = ~sample_df['Exclude?'].values.astype(bool) # shape = (samples x draws) vals = vals[included_idx, :] prior_vals = prior_vals[included_idx, :] else: excluded_idx = sample_df['Exclude?'].values samples = np.mean(vals, axis = 1).size cols = 5 N = samples rows = int(math.ceil(N/cols)) pal = sns.color_palette(cmap,n_colors=samples) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (11, 1.5*rows)) excluded_count = 0 for i in range(samples): ax = fig.add_subplot(gs[i]) if plot_excluded_samples: if excluded_idx[i]: linestyle = 'dashed' edgecolor = pal[i] post_label = 'Posterior (excluded sample)' prior_label = 'Prior (excluded sample)' excluded_count += 1 else: linestyle = 'solid' edgecolor = 'none' post_label = 'Posterior' prior_label = 'Prior' else: linestyle = 'solid' edgecolor = 'none' post_label = 'Posterior' prior_label = 'Prior' sns.kdeplot(vals[i,:].ravel(), label = post_label, fill = True, color = pal[i], edgecolor=edgecolor, linestyle = linestyle, alpha = 0.3, ax = ax, cut = 0) if plot_prior: sns.kdeplot(prior_vals[i,:].ravel(), label = prior_label, fill = False, color = pal[i], ax = ax, cut = 0) ax.tick_params(bottom = True, top = True, left = False, right = False, direction = 'in', labelsize = 12) ax.set_ylabel('') ax.set_yticklabels([]) ax.invert_xaxis() if (i == 0) | ((excluded_count == 1) and (excluded_idx[i])): ax.legend(frameon = False, loc = 'upper left') fig.suptitle('Section: ' + section, fontsize = 14) fig.supxlabel('Age (Ma)', fontsize = 12) fig.tight_layout() return fig
[docs] def sample_ages_per_chain(full_trace, sample_df, section, chains = None, plot_prior = False, plot_excluded_samples = False, legend = True, cmap = 'viridis'): """ Plot sample age posterior distributions for a given section, with separate distributions for each chain. Each subplot contains posterior distributions for a different sample. Use to check for posterior multimodality (in this example, each chain has explored a different mode of the posterior age distributions). .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import sample_ages_per_chain full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' sample_df = load_object(example_sample_path) section = '1' sample_ages_per_chain(full_trace, sample_df, section, chains = [0, 1, 2, 3]) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. section: str Name of target section. chains: list(int) or numpy.array(int); optional Indices of chains to include; optional (defaults to all chains). plot_prior: bool, optional Plot prior distributions for sample ages. Defaults to ``False``. plot_excluded_samples: bool, optional Plot age distributions for proxy observations that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``. legend: bool, optional Generate a legend. Defaults to ``True``. cmap: str, optional Name of seaborn color palette to use for different chains. Defaults to 'viridis'. Returns ------- fig: matplotlib.pyplot.figure Figure with per-chain posterior sample age distributions. """ # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) sample_df, _ = clean_data(sample_df, None, proxies, [section]) # chains x draws x samples vals = full_trace.posterior[str(section)+'_ages'].values if chains is None: chains = np.arange(vals.shape[0]) prior_vals = full_trace.prior[str(section)+'_ages'].values if not plot_excluded_samples: included_idx = ~sample_df['Exclude?'].values.astype(bool) # shape = (samples x draws) vals = vals[:, :, included_idx] prior_vals = prior_vals[:, :, included_idx] excluded_idx = np.array([False] * vals.shape[2]) else: excluded_idx = sample_df['Exclude?'].values samples = vals.shape[2] cols = 5 N = samples rows = int(math.ceil(N/cols)) cs = {} pal = sns.color_palette(cmap, n_colors=len(chains)) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (11, 1.5*rows)) excluded_count = 0 for i in range(samples): ax = fig.add_subplot(gs[i]) if excluded_idx[i]: linestyle = 'dashed' post_label = 'Posterior (excluded sample)' prior_label = 'Prior (excluded sample)' excluded_count += 1 lw = 1 else: lw = 0 linestyle = 'solid' post_label = 'Posterior' prior_label = 'Prior' for m in range(len(chains)): cs[m] = pal[m] sns.kdeplot(vals[chains[m], :, i].ravel(), label = post_label + ', chain ' + str(chains[m]), fill = True, color = cs[m], alpha = 0.3, ax = ax, lw = lw, linestyle = linestyle, cut = 0) # assumes only 1 prior chain if plot_prior: sns.kdeplot(prior_vals[:, :, i].ravel(), label = prior_label, fill = False, color = 'k', ax = ax, cut = 0) ax.tick_params(bottom = True, top = True, left = False, right = False, direction = 'in', labelsize = 12) ax.set_ylabel('') ax.set_yticklabels([]) ax.invert_xaxis() if legend: if (i == 0) | ((excluded_count == 1) and (excluded_idx[i])): ax.legend(frameon = False, loc = 'upper left') fig.suptitle('Section: ' + section, fontsize = 14) fig.supxlabel('Age (Ma)', fontsize = 12) fig.tight_layout() return fig
[docs] def age_constraints(full_trace, section, cmap = 'viridis', **kwargs): """ For a given section, plot prior and posterior age distributions for all depositional age constraints (and limiting age constraints that provide a minimum or maximum age for the entire section). To plot intermediate limiting ages (i.e., detrital and intrusive age constraints in the middle of a section), use :py:meth:`limiting_age_constraints() <stratmc.plotting.limiting_age_constraints>`. .. plot:: from stratmc.data import load_trace from stratmc.plotting import age_constraints full_trace = load_trace('examples/example_docs_trace') section = '1' age_constraints(full_trace, section) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. section: str Name of target section. cmap: str, optional Name of seaborn color palette to use for age distributions. Defaults to 'viridis'. Returns ------- fig: matplotlib.pyplot.figure Figure with prior and posterior depositional age constraint distributions. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 vals = az.extract(full_trace.posterior)[str(section)+'_radiometric_age'].values prior_vals = az.extract(full_trace.prior)[str(section)+'_radiometric_age'].values # if only 1 age constraint (should only occur if one the min or max age is shared), reshape for plotting # (this should actually never happen -- even if shared, the age constraints end up in the section_radiometric_age RV in the model) if vals.ndim == 1: vals = np.reshape(vals, (1, len(vals))) if prior_vals.ndim == 1: prior_vals = np.reshape(prior_vals, (1, len(prior_vals))) samples = np.mean(vals, axis = 1).size cols = np.min([4, samples]) N = samples rows = int(math.ceil(N/cols)) pal = sns.color_palette(cmap, n_colors=samples) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (11, 2*rows)) for i in range(samples): ax = fig.add_subplot(gs[i]) sns.kdeplot(vals[i,:].ravel(), label = 'Posterior', fill = True, color = pal[i], edgecolor='None', alpha = 0.3, ax = ax, cut = 0) sns.kdeplot(prior_vals[i,:].ravel(), label = 'Prior', fill = False, color = pal[i], ax = ax, cut = 0) ax.set_ylabel('') ax.set_yticklabels([]) ax.invert_xaxis() ax.tick_params(bottom = True, top = True, left = False, right = False, direction = 'in', labelsize = fs) if i == 0: ax.legend(frameon = False, loc = 'upper left') fig.suptitle('Section: ' + section, fontsize = fs + 2) fig.supxlabel('Age (Ma)', fontsize = fs) fig.tight_layout() return fig
[docs] def limiting_age_constraints(full_trace, sample_df, ages_df, section, cmap = 'viridis', **kwargs): """ For a given section, plot prior and posterior age distributions for all intermediate limiting (i.e., detrital and intrusive ages in the middle of a section) age constraints. To plot depositional age constraints (and limiting age constraints that provide a minimum or maximum age for the entire section), use :py:meth:`age_constraints() <stratmc.plotting.age_constraints>`. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import limiting_age_constraints full_trace = load_trace('examples/example_docs_limiting_ages_trace') example_sample_path = 'examples/example_sample_df_limiting_ages' example_ages_path = 'examples/example_ages_df_limiting_ages' sample_df = load_object(example_sample_path) ages_df = load_object(example_ages_path) section = '1' limiting_age_constraints(full_trace, sample_df, ages_df, section) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. ages_df: pandas.DataFrame :class:`pandas.DataFrame` containing age constraints for all sections. section: str Name of target section. cmap: str, optional Name of seaborn color palette to use for age distributions. Defaults to 'viridis'. Returns ------- fig: matplotlib.pyplot.figure Figure with prior and posterior limiting age constraint distributions. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) sample_df, ages_df = clean_data(sample_df, ages_df, proxies, [section]) detrital_df = ages_df[(ages_df['section']==section) & (ages_df['intermediate detrital?'])] intrusive_df = ages_df[(ages_df['section']==section) & (ages_df['intermediate intrusive?'])] detrital_named = [] if detrital_df.shape[0] > 0: # grab names from list of model variables (will only include constriants that aren't shared) detrital_vars = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (l[0:len(section)] == section) and (f"{'detrital_age_'}" in l) ] detrital_named += [False] * len(detrital_vars) # if constraints are shared, grab those names too detrital_vars += list(detrital_df[detrital_df['shared?']]['name'].values) detrital_named += [True] * len(list(detrital_df[detrital_df['shared?']]['name'].values)) else: detrital_vars = [] intrusive_named = [] if intrusive_df.shape[0] > 0: # grab names from list of model variables (will only include constriants that aren't shared) intrusive_vars = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (l[0:len(section)] == section) and (f"{'intrusive_age_'}" in l) ] intrusive_named += [False] * len(intrusive_vars) # if constraints are shared, grab those names too intrusive_vars += list(intrusive_df[intrusive_df['shared?']]['name'].values) intrusive_named += [True] * len(list(intrusive_df[intrusive_df['shared?']]['name'].values)) else: intrusive_vars = [] # number of age constraints to plot combined_vars = detrital_vars + intrusive_vars combined_named = detrital_named + intrusive_named if len(combined_vars) == 0: sys.exit(f"Section {section} has no intermediate limiting age constraints") N = len(combined_vars) cols = np.min([4, N]) rows = int(math.ceil(N/cols)) pal = sns.color_palette(cmap, n_colors=N) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (11, 2*rows)) for i in range(N): var = combined_vars[i] named = combined_named[i] if named: title = var elif i < len(detrital_vars): title = 'Detrital ' + str(i) else: title = 'Intrusive ' + str(i - len(detrital_vars)) ax = fig.add_subplot(gs[i]) ax.set_title(title, fontsize = fs) # distributions for detrital and intrsive ages are always 1d (different objects created for each constraint) vals = az.extract(full_trace.posterior)[var].values prior_vals = az.extract(full_trace.prior)[var].values sns.kdeplot(vals.ravel(), label = 'Posterior', fill = True, color = pal[i], edgecolor='None', alpha = 0.3, ax = ax, cut = 0) sns.kdeplot(prior_vals.ravel(), label = 'Prior', fill = False, color = pal[i], ax = ax, cut = 0) ax.set_ylabel('') ax.set_yticklabels([]) ax.invert_xaxis() ax.tick_params(bottom = True, top = True, left = False, right = False, direction = 'in', labelsize = fs) if i == 0: ax.legend(frameon = False, loc = 'upper left') fig.suptitle('Section: ' + section, fontsize = fs + 2) fig.supxlabel('Age (Ma)', fontsize = fs) fig.tight_layout() return fig
[docs] def sadler_plot(full_trace, sample_df, ages_df, method = 'density', duration_bins = 50, rate_bins = 50, scale = 'log', include_age_constraints = True, density_cmap = 'jet', section_cmap = 'Spectral', figsize = (6, 4), **kwargs): """ Plot accumulation rate against duration. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import sadler_plot full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' example_ages_path = 'examples/example_ages_df' sample_df = load_object(example_sample_path) ages_df = load_object(example_ages_path) sadler_plot(full_trace, sample_df, ages_df) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. ages_df: pandas.DataFrame :class:`pandas.DataFrame` containing age constraints for all sections. method: str, optional Plot as a 2D histogram ('density') or as a scatter plot ('scatter'). The density plot will combine data for all sections in ``sections``, while a scatter plot will assign a unique color to each section. Defaults to 'density'. duration_bins: int, optional Number of bin edges to use for the duration data. Defaults to 50. rate_bins: int, optional Number of bin edges to use for the rate data. Defaults to 50. scale: str, optional Scaling for x- and y-axes ('linear' or 'log'). Defaults to 'log'. sections: list(str) numpy.array(str), optional List of target sections. Defaults to all sections in ``sample_df``. include_age_constraints: bool, optional Include age constraints in sedimentation rate calculations. Defaults to ``False``. density_cmap: str, optional Name of matplotlib colormap to use for probability density if ``method`` is 'density'. Defaults to 'jet'. section_cmap: str, optional Name of seaborn color palette to use for sections if ``method`` is 'scatter'. Defaults to 'Spectral'. Returns ------- fig: matplotlib.pyplot.figure Figure with sediment accumulation rate plotted against duration. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'sections' in kwargs: sections = list(kwargs['sections']) else: # any sections that have no relevant proxy observations would not have ages tracked in the trace sections = np.unique(sample_df.dropna(subset = proxies, how = 'all')['section']) sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections) rate_df = accumulation_rate(full_trace, sample_df, ages_df, method = 'all', sections = sections, include_age_constraints = include_age_constraints) dur = rate_df['duration'].values rate = rate_df['rate'].values fig = plt.figure(figsize = figsize) ax = fig.gca() if method == 'density': if scale == 'log': x_bins = np.logspace(np.log10(dur.min()), np.log10(dur.max()), duration_bins) y_bins = np.logspace(np.log10(rate.min()), np.log10(rate.max()), rate_bins) else: x_bins = np.linspace(dur.min(), dur.max(), duration_bins) y_bins = np.linspace(rate.min(),rate.max(), rate_bins) H, xedges, yedges = np.histogram2d(dur, rate, bins=[x_bins, y_bins], density = False) # don't normalize during this step - calculation not correct if scale = log H[H == 0] = np.nan H = H/len(dur) # normalize m = ax.pcolormesh(xedges, yedges, H.T, cmap = density_cmap) cb = plt.colorbar(m) cb.set_label(label='Probability density', fontsize = fs) elif method == 'scatter': pal = sns.color_palette(section_cmap, n_colors = len(sections)) for i in np.arange(len(sections)): section = sections[i] ax.scatter(rate_df[rate_df['section']==section]['duration'].values, rate_df[rate_df['section']==section]['rate'].values, s = 5, edgecolor = pal[i], facecolor = 'none', alpha = 0.5, label = section) ax.legend() if scale == 'log': ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('LOG$_{10}$ (Duration [yr])', fontsize = fs) ax.set_ylabel('LOG$_{10}$ (Accumulation rate [mm/yr])', fontsize = fs) else: ax.set_xlabel('Duration (yr)', fontsize = fs) ax.set_ylabel('Accumulation rate (mm/yr)', fontsize = fs) ax.tick_params(labelsize = fs) fig.tight_layout() return fig
[docs] def accumulation_rate_stratigraphy(full_trace, sample_df, ages_df, age_bins = 50, age_bin_edges = [], rate_bins = 50, rate_bin_edges = [], rate_scale = 'log', include_age_constraints = True, cmap = 'jet', figsize = (8, 4), **kwargs): """ Plot the probability density of sediment accumulation rates (calculated between successive samples) through time. Probability density for each age bin sums to 1. Unless a ``sections`` argument is passed, includes accumulation rates for all sections. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import accumulation_rate_stratigraphy full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' example_ages_path = 'examples/example_ages_df' sample_df = load_object(example_sample_path) ages_df = load_object(example_ages_path) accumulation_rate_stratigraphy(full_trace, sample_df, ages_df, sections = ['1']) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. ages_df: pandas.DataFrame :class:`pandas.DataFrame` containing age constraints for all sections. age_bins: int, optional Number of bin edges for age data (if ``age_bin_edges`` not provided). Defaults to 50. rate_bins: int, optional Number of bin edges for rate data (if ``rate_bin_edges`` not provided). Defaults to 50. age_bin_edges: list(float) or numpy.array(float), optional List or array of bin edges for the age data. rate_bin_edges: list(float) numpy.array(float), optional List or array of bin edges for the rate data. rate_scale: str, optional Scaling for rate ('linear' or 'log'). Defaults to 'log'. include_age_constraints: bool, optional Include age constraints in sedimentation rate calculations. Defaults to ``True``. sections: list(str) or numpy.array(str), optional Section(s) to include. If not provided, combines data from all sections in ``sample_df``. Returns ------- fig: matplotlib.pyplot.figure Figure with accumulation rate probability density through time. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'sections' in kwargs: sections = list(kwargs['sections']) else: sections = np.unique(sample_df.dropna(subset = proxies, how = 'all')['section']) sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections) rate_df = accumulation_rate(full_trace, sample_df, ages_df, sections = sections, method = 'successive', include_age_constraints = include_age_constraints) if len(age_bin_edges) == 0: age_bin_edges = np.linspace(np.min(rate_df['top_age']), np.max(rate_df['base_age']), age_bins) if len(rate_bin_edges) == 0: if rate_scale == 'log': rate_bin_edges = np.logspace(np.log10(rate_df['rate'].min()), np.log10(rate_df['rate'].max()), rate_bins) elif rate_scale == 'linear': rate_bin_edges = np.linspace(rate_df['rate'].min(), rate_df['rate'].max(), rate_bins) # get age bin centers age_centers = (age_bin_edges[:-1] + age_bin_edges[1:]) / 2 age_vec = [] rate_vec = [] for rate, base, top in zip(rate_df['rate'], rate_df['base_age'], rate_df['top_age']): for i in np.arange(len(age_bin_edges)-1): center = age_centers[i] current_bin = pd.Interval(age_bin_edges[i], age_bin_edges[i+1], closed = 'left') current_sample_pair = pd.Interval(top, base, closed = 'right') result = current_bin.overlaps(current_sample_pair) if result: age_vec.append(center) rate_vec.append(rate) fig = plt.figure(figsize = figsize) ax = fig.gca() H, xedges, yedges = np.histogram2d(age_vec, rate_vec, bins=[age_bin_edges, rate_bin_edges], density = False) H[H == 0] = np.nan H = H/np.nansum(H, axis=1,keepdims=1) # each column (age bin) should sum to 1 m = ax.pcolormesh(xedges, yedges, H.T, cmap = cmap) if rate_scale == 'log': ax.set_yscale('log') ax.set_ylabel('LOG$_{10}$ (Accumulation rate [m/Myr])', fontsize = fs) elif rate_scale == 'linear': ax.set_ylabel('Accumulation rate (m/Myr)', fontsize = fs) ax.invert_xaxis() ax.set_xlabel('Age (Ma)', fontsize = fs) cb = plt.colorbar(m) cb.set_label(label='Probability density', fontsize = fs) return fig
[docs] def section_age_range(full_trace, sample_df, ages_df, lower_age, upper_age, legend = True, section_cmap = 'Spectral', **kwargs): """ Plot the stratigraphic interval corresponding to a given age range (based on the posterior section age models). If ``sections`` is not provided, includes each section that overlaps the target age range. .. plot:: from stratmc.data import load_object, load_trace from stratmc.plotting import section_age_range full_trace = load_trace('examples/example_docs_trace') example_sample_path = 'examples/example_sample_df' example_ages_path = 'examples/example_ages_df' sample_df = load_object(example_sample_path) ages_df = load_object(example_ages_path) section_age_range(full_trace, sample_df, ages_df, 120, 130) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. sample_df: pandas.DataFrame :class:`pandas.DataFrame` containing proxy data for all sections. ages_df: pandas.DataFrame :class:`pandas.DataFrame` containing age constraints for all sections. lower_age: float Lower bound (youngest) of the target age interval. upper_age: float Upper bound (oldest) of the target age interval. legend: bool, optional Generate a legend. Defaults to ``True``. cmap: str, optional Name of seaborn color palette to use for sections. Defaults to 'Spectral'. Returns ------- fig: matplotlib.pyplot.figure Tracer stratigraphy for each section, with the stratigraphic interval corresponding to the input age range highlighted. """ # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] if 'sections' in kwargs: sections = list(kwargs['sections']) else: sections = np.unique(sample_df.dropna(subset = proxies, how = 'all')['section']) if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections) stats_df = age_range_to_height(full_trace, sample_df, ages_df,lower_age, upper_age, **kwargs) sections = stats_df.index.tolist() cs = {} pal = sns.color_palette(section_cmap, n_colors=len(sections)) cols = 4 N = len(sections) rows = int(math.ceil(N/cols)) gs = gridspec.GridSpec(rows, cols) fig = plt.figure(figsize = (10, 4.5*rows)) for n in range(N): section = sections[n] cs[section] = pal[n] ax = fig.add_subplot(gs[n]) section_df = sample_df[sample_df['section']==section] ax.scatter(section_df[proxy], section_df['height'], color = cs[section], edgecolor = 'k', zorder = 3) # 0 is the top height, -1 is the bottom height ax.axhspan(stats_df.loc[section]['base_2.5'], stats_df.loc[section]['top_97.5'], zorder = 0, color = (0.95, 0.95, 0.95), label='95% envelope') ax.axhspan(stats_df.loc[section]['base_16'], stats_df.loc[section]['top_84'], zorder = 1, color = (0.8, 0.8, 0.8), label='68% envelope') ax.axhline(stats_df.loc[section]['base_mle'], color = 'darkgray', linestyle = 'solid', zorder = 2) ax.axhline(stats_df.loc[section]['top_mle'], color = 'darkgray', linestyle = 'solid', zorder = 2, label = 'Most likely range') ax.set_ylabel('Height (m)', fontsize = fs) if proxy == 'd13c': ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}} (‰)$', fontsize = fs) else: ax.set_xlabel(proxy, fontsize = fs) ax.set_title(section, fontsize = fs) ax.tick_params(labelsize=fs) ax.locator_params(axis='x', nbins=5) if legend: if n == 0: ax.legend(loc = 'upper left') fig.tight_layout() plt.subplots_adjust(wspace=0.4) return fig
[docs] def proxy_data_gaps(full_trace, time_grid = None, yaxis = 'percentage', figsize = (6, 3.5), **kwargs): """ For a set of discrete time bins, shows the number of draws from the posterior where there are no proxy observations (i.e., where there are temporal `gaps` in the proxy data). This plot can be used to determine where additional observations are needed to improve the inference. .. plot:: from stratmc.data import load_trace from stratmc.plotting import proxy_data_gaps full_trace = load_trace('examples/example_docs_trace') proxy_data_gaps(full_trace, time_grid = full_trace.X_new.X_new.values[::2]) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. time_grid: np.array, optional Time bin edges; if not provided, defaults to the ``ages`` array passed to :py:meth:`get_trace() <stratmc.inference.get_trace>`. yaxis: str, optional Set y-axis to percentage of posterior draws without observations ('percentage') or to the number of posterior draws without observations ('count'). Defaults to 'percentage'. Returns ------- fig: matplotlib.pyplot.figure Figure with bar plot of number of posterior draws with gaps for each time bin. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 if time_grid is None: time_grid = full_trace.X_new.X_new.values age_gaps, grid_centers, grid_widths, n = find_gaps(full_trace, time_grid = time_grid) fig = plt.figure(figsize = figsize) ax = fig.gca() if yaxis == 'percentage': ax.bar(grid_centers, height = 100 * age_gaps/n, width = grid_widths, color = 'lightgray', edgecolor = 'k', lw = 0.5) ax.set_ylabel('% solutions without observations', fontsize = fs) elif yaxis == 'count': ax.bar(grid_centers, height = age_gaps, width = grid_widths, color = 'lightgray', edgecolor = 'k', lw = 0.5) ax.set_ylabel('# solutions without observations', fontsize = fs) ax.set_xlabel('Age (Ma)', fontsize = fs) ax.tick_params(labelsize = fs) ax.invert_xaxis() fig.tight_layout() return fig
[docs] def proxy_data_density(full_trace, time_grid = None, figsize = (6, 3.5), **kwargs): """ Plot the mean number proxy observations in discrete time bins (averaged over all posterior draws). This plot can be used to determine where proxy observations are relatively sparse, and additional observations may improve the inference. .. plot:: from stratmc.data import load_trace from stratmc.plotting import proxy_data_density full_trace = load_trace('examples/example_docs_trace') proxy_data_density(full_trace, time_grid = full_trace.X_new.X_new.values[::2]) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. time_grid: np.array, optional Time bin edges; if not provided, defaults to the ``ages`` array passed to :py:meth:`get_trace() <stratmc.inference.get_trace>`. Returns ------- fig: matplotlib.pyplot.figure Figure with bar plot of mean number of observations in each time bin. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 if time_grid is None: time_grid = full_trace.X_new.X_new.values sample_counts, grid_edges, n = count_samples(full_trace, time_grid = time_grid) grid_widths = np.diff(grid_edges) fig = plt.figure(figsize = figsize) ax = fig.gca() ax.bar(grid_edges[:-1], height = sample_counts/n, width = grid_widths, align = 'edge', color = 'lightgray', edgecolor = 'k', lw = 0.5) ax.set_ylabel('Average # observations', fontsize = fs) ax.set_xlabel('Age (Ma)', fontsize = fs) ax.tick_params(labelsize = fs) ax.invert_xaxis() fig.tight_layout() return fig
[docs] def lengthscale_traceplot(full_trace, chains = None, legend = True, figsize = (5, 3.5), **kwargs): """ Generate trace plot (parameter value vs. step in Markov chain) for the :class:`pymc.gp.cov.ExpQuad <pymc.gp.cov.ExpQuad>` covariance kernel lengthscale. By default, includes all chains; to plot the draws for only a subset of chains, past list of chain indices to ``chains``. Use to check for posterior multimodality. .. plot:: from stratmc.data import load_trace from stratmc.plotting import lengthscale_traceplot full_trace = load_trace('examples/example_docs_trace') lengthscale_traceplot(full_trace, chains = [0, 1, 2, 3]) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. chains: list(int) or numpy.array(int); optional Indices of chains to plot; optional (defaults to all chains). proxy: str, optional Target proxy; only required if more than one proxy was included in the inference. legend: bool, optional Generate a legend. Defaults to ``True``. Returns ------- fig: matplotlib.pyplot.figure Figure with lengthscale trace plot. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] fig = plt.figure(figsize = figsize) ax = fig.gca() if chains is None: post_ls = full_trace.posterior['gp_ls_' +str(proxy)].values chains = np.arange(post_ls.shape[0]) else: post_ls = full_trace.sel(chain = chains).posterior['gp_ls_' +str(proxy)].values sns.reset_defaults() for i in np.arange(post_ls.shape[0]): ax.plot(np.arange(post_ls.shape[1]), post_ls[i, :, :], lw = 0.5, label = 'Chain ' + str(chains[i])) ax.set_xlabel('Draw', fontsize = fs) ax.set_ylabel('Lengthscale (%s)' % proxy, fontsize = fs) if legend: ax.legend() ax.tick_params(labelsize = fs) fig.tight_layout() return fig
[docs] def lengthscale_stability(full_trace, figsize = (5, 3.5), **kwargs): """ Plot the posterior standard deviation of the :class:`pymc.gp.cov.ExpQuad <pymc.gp.cov.ExpQuad>` covariance kernel lengthscale when 1 through *N* chains are considered. When the posterior has been sufficiently explored, the standard deviation will stabilize; if it has not stabilized, then additional chains should be run. To consider chains from multiple traces associated with the same inference model, first combine the traces (saved as NetCDF files) using :py:meth:`combine_traces() <stratmc.data>` in :py:mod:`stratmc.data`. .. plot:: from stratmc.data import load_trace from stratmc.plotting import lengthscale_stability full_trace = load_trace('examples/example_docs_trace') lengthscale_stability(full_trace, fontsize = 10) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. proxy: str, optional Target proxy; only required if more than one proxy was included in the inference. Returns ------- fig: matplotlib.pyplot.figure Figure showing the standard deviation of the covariance kernel lengthscale hyperparameter posterior for 1 through *N* chains. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] gp_ls_variance = calculate_lengthscale_stability(full_trace, proxy = proxy) fig = plt.figure(figsize = figsize) ax = fig.gca() ax.plot(np.arange(len(gp_ls_variance)) + 1, gp_ls_variance, color = 'k') ax.set_xlabel('Number of chains', fontsize = fs) ax.set_ylabel('Lengthscale standard deviation (%s)' % proxy, fontsize = fs) ax.tick_params(labelsize = fs) fig.tight_layout() return fig
[docs] def proxy_signal_stability(full_trace, figsize = (5, 3.5), **kwargs): """ Plot the sum (over all time slices) of the residuals between the median inferred proxy signal when all *N* chains are considered compared to when 1 to *N-1* chains are considered. When the posterior has been sufficiently explored, the residuals will stabilize and approach zero; if they have not stabilized, then additional chains should be run. To consider chains from multiple traces associated with the same inference model, first combine the traces (saved as NetCDF files) using :py:meth:`combine_traces() <stratmc.data>` in :py:mod:`stratmc.data`. .. plot:: from stratmc.data import load_trace from stratmc.plotting import proxy_signal_stability full_trace = load_trace('examples/example_docs_trace') proxy_signal_stability(full_trace) plt.show() Parameters ---------- full_trace: arviz.InferenceData An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`. proxy: str, optional Target proxy; only required if more than one proxy was included in the inference. Returns ------- fig: matplotlib.pyplot.figure Figure showing the stability of the proxy signal inference. """ if 'fontsize' in kwargs: fs = kwargs['fontsize'] else: fs = 12 # get list of proxies included in model from full_trace variables = [ l for l in list(full_trace["posterior"].data_vars.keys()) if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l) ] proxies = [] for var in variables: proxies.append(var[6:]) if 'proxy' in kwargs: proxy = kwargs['proxy'] else: proxy = proxies[0] median_residuals = calculate_proxy_signal_stability(full_trace, proxy = proxy) fig = plt.figure(figsize = figsize) ax = fig.gca() ax.plot(np.arange(median_residuals.shape[0]) + 1, np.sum(median_residuals, axis = 1), color = 'k') ax.set_xlabel('Number of chains', fontsize = fs) ax.set_ylabel('Sum of median ' + str(proxy) + ' residuals', fontsize = fs) ax.tick_params(labelsize = fs) fig.tight_layout() return fig