Source code for stratmc.plotting

import math
import sys

import arviz as az
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import gridspec
from matplotlib import pyplot as plt
from scipy.ndimage import gaussian_filter as gaussian
from scipy.stats import gaussian_kde

from stratmc.data import accumulation_rate, clean_data
from stratmc.inference import (
    age_range_to_height,
    calculate_lengthscale_stability,
    calculate_proxy_signal_stability,
    count_samples,
    find_gaps,
    map_ages_to_section,
)

pd.options.mode.chained_assignment = None


[docs]
def proxy_strat(sample_df, ages_df, proxy = 'd13c', plot_constraints = True, plot_excluded_samples = False, cmap = 'Spectral', legend = False, **kwargs):
    """

    Plot stratigraphic data (proxy observations and age constraints) for each section.

    .. plot::

       from stratmc.data import load_object
       from stratmc.plotting import proxy_strat

       example_sample_path = 'examples/example_sample_df'
       example_ages_path = 'examples/example_ages_df'
       sample_df = load_object(example_sample_path)
       ages_df = load_object(example_ages_path)

       proxy_strat(sample_df, ages_df, proxy = 'd13c')

       plt.show()

    Parameters
    ----------

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    ages_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing age constraints for all sections.

    proxy: str, optional
        Name of proxy. Defaults to 'd13c'.

    sections: list(str) or numpy.array(str), optional
        List of sections to plot. Defaults to all sections in ``sample_df``.

    plot_constraints: bool, optional
        Whether to plot age constraints as dashed lines. Ages are printed above dashed lines by defalut; to turn off age labels, pass ``print_ages = False``.

    plot_excluded_samples: bool, optional
        Whether to plot proxy observations for excluded samples (``Exclude?`` is ``True`` in ``sample_df``) as red dots. Defaults to ``False``.

    cmap: str, optional
        Name of seaborn color palette to use for sections. Defaults to 'Spectral'.

    legend: bool, optional
        Generate a legend. Defaults to ``False``.

    print_ages: bool, optional
        Print ages as text on figure.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with observations for each section.

    """

    if 'sections' in kwargs:
        sections = list(kwargs['sections'])
    else:
        sections = np.unique(sample_df[~np.isnan(sample_df[proxy])]['section'])

    if 'print_ages' in kwargs:
        print_ages = kwargs['print_ages']
    else:
        print_ages = True

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    cs = {}
    pal = sns.color_palette(cmap,n_colors=len(sections))

    cols = 4
    N = len(sections)
    rows = int(math.ceil(N/cols))

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (10, 4.5*rows))

    for n in range(N):
        section = sections[n]
        cs[section] = pal[n]
        ax = fig.add_subplot(gs[n])

        if len(sample_df[proxy][(sample_df['section'] == section) & (sample_df['Exclude?'] == True)]) > 0:
            if plot_excluded_samples:
                ax.scatter(sample_df[proxy][(sample_df['section'] == section) & (sample_df['Exclude?'] == True)],
                        sample_df['height'][(sample_df['section'] == section) & (sample_df['Exclude?'] == True)],
                        color = 'red',
                        edgecolors = 'k',
                        label = 'Excluded sample')


        ax.scatter(sample_df[proxy][(sample_df['section'] == section) & (sample_df['Exclude?'] != True)],
                   sample_df['height'][(sample_df['section'] == section) & (sample_df['Exclude?'] != True)],
                   color = cs[section],
                   edgecolors = 'k',
                   label = 'Sample')


        xl = ax.get_xlim()
        yl = ax.get_ylim()

        if plot_constraints:
            age_heights = ages_df['height'][(ages_df['section']==section) & (~ages_df['Exclude?']) & (~ages_df['intermediate detrital?'])  & (~ages_df['intermediate intrusive?']) & (~ages_df['depositional?'])]
            for h in age_heights:
                ax.axhline(h,
                      color = cs[section],
                      linestyle = 'dashed',
                      label = 'Depositional age',
                      zorder = 2)

            detrital_age_heights = ages_df['height'][(ages_df['section']==section) & (ages_df['Exclude?'] == False) & (ages_df['intermediate detrital?'] == True)]

            for h in detrital_age_heights:
                ax.axhline(h,
                        color = cs[section],
                        linestyle = 'dotted',
                        label = 'Detrital age',
                        zorder = 2)

            intrusive_age_heights = ages_df['height'][(ages_df['section']==section) & (ages_df['Exclude?'] == False) & (ages_df['intermediate intrusive?'] == True)]

            for h in intrusive_age_heights:
                ax.axhline(h,
                      color = cs[section],
                      linestyle = 'dashdot',
                      label = 'Intrusive age',
                      zorder = 2)

            if legend:
                ax.legend()


        x_range = xl[1] - xl[0]
        y_range = yl[1] - yl[0]

        if print_ages:
            for age, height, sigma, dist_type, param1, param2 in zip(ages_df['age'][(ages_df['section']==section) & (~ages_df['Exclude?']) & (~ages_df['depositional?'])],
                                          ages_df['height'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])],
                                          ages_df['age_std'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])],
                                          ages_df['distribution_type'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])],
                                          ages_df['param_1'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])],
                                          ages_df['param_2'][(ages_df['section']==section) & (ages_df['Exclude?'] != True) & (~ages_df['depositional?'])]):

                if dist_type == 'Normal':
                    ax.text(xl[0] + x_range * 0.05, # 0.45
                            height + y_range * 0.01,
                            s = f'{age:.1f}' + r'$\pm$' + f'{2*sigma:.1f} Ma',
                            fontsize = 10)

                elif dist_type == 'Uniform':
                    ax.text(xl[0] + x_range * 0.05, # 0.45
                            height + y_range * 0.01,
                            s = f'{param1:.1f} - {param2:.1f} Ma',
                            fontsize = 10)


        ax.set_xlim(xl)

        ax.set_ylabel('Height (m)', fontsize = fs)

        if proxy == 'd13c':
            ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}} (‰)$', fontsize = fs)
        else:
            ax.set_xlabel(proxy, fontsize = fs)

        ax.set_title(section, fontsize = fs)
        ax.tick_params(bottom = True, top = False, left = True, right = False, direction = 'out', labelsize = fs, width = 1.5)
        ax.locator_params(axis='x', nbins = 5)

    fig.tight_layout()

    plt.subplots_adjust(wspace=0.45)

    return fig



[docs]
def proxy_inference(sample_df, ages_df, full_trace, legend = True, plot_constraints = False, plot_samples = False, plot_excluded_samples = False, plot_mean = False, plot_mle = False, orientation = 'horizontal', marker_size = 20, section_legend = False, section_cmap = 'Spectral', **kwargs):

    """
    Plot the inferred proxy signal over time.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import proxy_inference

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       example_ages_path = 'examples/example_ages_df'
       sample_df = load_object(example_sample_path)
       ages_df = load_object(example_ages_path)

       proxy_inference(sample_df, ages_df, full_trace, proxy = 'd13c')

       plt.show()


    Parameters
    ----------

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    ages_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing age constraints for all sections.

    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    proxy: str, optional
        Tracer to plot; only required if more than one proxy was included in the inference.

    legend: bool, optional
        Generate a legend. Defaults to ``True``.

    plot_constraints: bool, optional
        Plot age constraints for each section as dashed lines. Defaults to ``False``.

    plot_samples: bool, optional
        Plot proxy observations by most likely posterior age. Defaults to ``False``.

    plot_excluded_samples: bool, optional
        Plot proxy observations that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``.

    plot_mean: bool, optional
        Plot the mean as a dashed line. Defaults to ``False``.

    plot_mle: bool, optional
        Plot the maximum likelihood estimate. Defaults to ``False``.

    orientation: str, optional
        Orientation of figure ('horizontal' with age on the x-axis, or 'vertical' with age on the y-axis). Defaults to 'horizontal'.

    marker_size: int, optional
        Size of markers if ``plot_samples`` is ``True``. Defaults to 20.

    section_legend: bool, optional
        Include section names in the legend (if ``plot_samples`` is ``True``). Defaults to ``False``.

    section_cmap: str, optional
        Name of seaborn color palette to use for sections (if ``plot_samples`` is ``True``). Defaults to 'Spectral'.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with the proxy signal inference.

    """

    if orientation == 'horizontal':
        horizontal = True
        vertical = False
    if orientation == 'vertical':
        horizontal = False
        vertical = True

    if 'figsize' in kwargs:
        figsize = kwargs['figsize']
    elif horizontal:
        figsize = (9, 5)
    elif vertical:
        figsize = (5, 9)

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']
    else:
        proxy = proxies[0]

    if 'sections' in kwargs:
        sections = list(kwargs['sections'])
    else:
        sections = np.unique(sample_df[~np.isnan(sample_df[proxy])]['section'])

    sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections)

    ages = full_trace.X_new.X_new.values

    fig=plt.figure(figsize=figsize)

    ax = fig.gca()

    cs = {}
    pal = sns.color_palette(section_cmap,n_colors=len(sections))
    for i in range(len(sections)):
        section = sections[i]
        cs[section] = pal[i]

    min_ages = []
    max_ages = []

    if plot_samples: # by max likelihood
        plotted_excluded = False
        plotted_data = False

        for section in sections:
            section_df = sample_df[sample_df['section']==section]
            proxy_idx = (~np.isnan(section_df[proxy]))
            excluded_idx = (~np.isnan(section_df[proxy])) & (section_df['Exclude?'])

            sec_ages = az.extract(full_trace.posterior)[str(section)+'_ages'].values

            max_like = np.zeros(sec_ages.shape[0])
            for i in np.arange(sec_ages.shape[0]):
                sample_ages = sec_ages[i,:]
                dx = np.linspace(np.min(sample_ages), np.max(sample_ages), 1000)
                max_like[i] = dx[np.argmax(gaussian_kde(sample_ages, bw_method = 1)(dx))]

            if section_legend:
                label = section
            elif not plotted_data:
                label = 'Sample (most likely age)'
            else:
                label = '_nolegend_'

            if vertical:
                ax.scatter(section_df[proxy][proxy_idx & ~excluded_idx],
                           max_like[proxy_idx & ~excluded_idx],
                           #np.mean(sec_ages, axis = 1),
                           s = marker_size,
                           color = cs[section],
                           label = label,
                           edgecolors = 'k',
                           lw = 0.5,
                           zorder = 5)

                plotted_data = True

                if (plot_excluded_samples) and (len(section_df[proxy][excluded_idx]) > 0):
                    if not plotted_excluded:
                        excluded_label = 'Excluded samples'
                    else:
                        excluded_label = '_nolegend'

                    ax.scatter(section_df[proxy][excluded_idx & proxy_idx],
                           max_like[excluded_idx & proxy_idx],
                           s = marker_size,
                           color = 'none',
                           label = excluded_label,
                           edgecolors = cs[section],
                           lw = 0.5,
                           zorder = 5)

                    plotted_excluded = True

            if horizontal:
                ax.scatter(max_like[proxy_idx & ~excluded_idx],
                           section_df[proxy][proxy_idx & ~excluded_idx],
                           s = marker_size,
                           color = cs[section],
                           label = label,
                           lw = 0.5,
                           edgecolors = 'k',
                           zorder = 5)

                plotted_data = True

                if (plot_excluded_samples) and (len(section_df[proxy][proxy_idx & excluded_idx]) > 0):
                    if not plotted_excluded:
                        excluded_label = 'Excluded samples'
                    else:
                        excluded_label = '_nolegend'

                    ax.scatter(max_like[excluded_idx & proxy_idx],
                           section_df[proxy][excluded_idx & proxy_idx],
                           s = marker_size,
                           color = 'none',
                           label = excluded_label,
                           edgecolors = cs[section],
                           lw = 0.5,
                           zorder = 5)

                    plotted_excluded = True

        min_ages = np.append(min_ages, np.min(np.mean(sec_ages,axis = 1)))

        max_ages = np.append(max_ages, np.max(np.mean(sec_ages, axis = 1)))

    proxy_pred = az.extract(full_trace.posterior_predictive)['f_pred_' + proxy].values

    lo = np.percentile(proxy_pred, 2.5, axis=1).flatten()
    hi = np.percentile(proxy_pred, 97.5, axis=1).flatten()

    if vertical:
        ax.fill_betweenx(ages.ravel(),
                         hi,
                         lo,
                         color=(.95,.95,.95),
                         label='95% envelope',
                         linestyle = 'dotted',
                         lw = 1.5,
                         edgecolor = 'k',
                         zorder = 0)

    if horizontal:
        ax.fill_between(ages.ravel(),
                        hi,
                        lo,
                        color=(.95,.95,.95),
                        label='95% envelope',
                        linestyle = 'dotted',
                        edgecolor = 'k',
                        lw = 1.5,
                        zorder = 0)

    lo = np.percentile(proxy_pred, 17, axis=1).flatten()
    hi = np.percentile(proxy_pred, 100-17, axis=1).flatten()

    if vertical:
        ax.fill_betweenx(ages.ravel(),
                         hi,
                         lo,
                         color=(.8,.8,.8),
                         linestyle = 'dashed',
                         label='66% envelope',
                         edgecolor = 'k',
                         lw = 1.5,
                         zorder = 1)
    if horizontal:
        ax.fill_between(ages.ravel(),
                        hi,
                        lo,
                        color=(.8,.8,.8),
                        linestyle = 'dashed',
                        label='66% envelope',
                        edgecolor = 'k',
                        lw = 1.5,
                        zorder = 1)

    lo = np.percentile(proxy_pred, 33.5, axis=1).flatten()
    hi = np.percentile(proxy_pred, 100-33.5, axis=1).flatten()

    if vertical:
        ax.fill_betweenx(ages.ravel(),
                         hi,
                         lo,
                         color=(.65,.65,.65),
                         label='33% envelope',
                         edgecolor = 'k',
                         lw = 1.5,
                         zorder = 2)
    if horizontal:
        ax.fill_between(ages.ravel(),
                        hi,
                        lo,
                        color=(.65,.65,.65),
                        label='33% envelope',
                        edgecolor = 'k',
                        lw = 1.5,
                        zorder = 2)

    if plot_mean:
        if horizontal:
            ax.plot(ages.ravel(),
                    np.mean(proxy_pred,axis=1).flatten(),
                    color = 'k',
                    linestyle = 'solid',
                    lw = 2,
                    zorder = 3,
                   label = 'Mean ' + str(proxy))

        if vertical:
            ax.plot(np.mean(proxy_pred, axis=1).flatten(),
                    ages.ravel(),
                    color = 'k',
                    linestyle = 'solid',
                    lw = 2,
                    zorder = 3,
                   label = 'Mean ' + str(proxy))

    if plot_mle:
        dy = np.linspace(np.min(proxy_pred), np.max(proxy_pred), 200)
        max_like = np.zeros(ages.size)
        for i in np.arange(ages.size):
            time_slice = proxy_pred[i,:]
            max_like[i] = dy[np.argmax(gaussian_kde(time_slice, bw_method = 1)(dy))]
        max_like = gaussian(max_like, 2)

        if horizontal:
            ax.plot(ages.ravel(),
                    max_like,
                    zorder = 4,
                    color = 'k',
                    linestyle = 'solid',
                    lw = 2,
                    label = 'Most likely ' + str(proxy))

        if vertical:
            ax.plot(max_like,
                    ages.ravel(),
                    zorder = 4,
                    color = 'k',
                    linestyle = 'solid',
                    lw = 2,
                    label = 'Most likely ' + str(proxy))

    xl = ax.get_xlim()
    yl = ax.get_ylim()

    if plot_constraints:
        i = 0
        for section in sections:
            if i == 0:
                label = 'Age constraint'
            else:
                label =  '_nolegend_'
            if vertical:
                ax.hlines(ages_df['age'][ages_df['section']==section],
                          xl[0],
                          xl[1],
                          color = cs[section],
                          linestyle = 'dashed',
                          label = label,
                          zorder = -1)
                ax.set_xlim(xl)

            if horizontal:
                ax.vlines(ages_df['age'][ages_df['section']==section],
                          yl[0],
                          yl[1],
                          color = cs[section],
                          linestyle = 'dashed',
                          label = label,
                          zorder = -1)
                ax.set_ylim(yl)
            i += 1

    min_ages = np.append(min_ages, np.min(ages))
    max_ages = np.append(max_ages, np.max(ages))

    if vertical:
        ax.set_ylabel('Age (Ma)', fontsize = fs)
        if proxy == 'd13c':
            ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}}$ (‰)', fontsize = fs)
        else:
            ax.set_xlabel(proxy, fontsize = fs)

        ax.set_ylim([np.min(min_ages) - 1, np.max(max_ages) + 1])
        ax.invert_yaxis()

    if horizontal:
        ax.set_xlabel('Age (Ma)', fontsize = fs)
        if proxy == 'd13c':
            ax.set_ylabel(r'$\delta^{13}$C$_{\mathrm{carb}}$ (‰)', fontsize = fs)
        else:
            ax.set_ylabel(proxy, fontsize = fs)

        ax.set_xlim([np.min(min_ages) - 1, np.max(max_ages) + 1])
        ax.invert_xaxis()

    ax.tick_params(bottom = True, top = True, left = True, right = True, direction = 'in', labelsize = fs, width = 1.5)

#     for axis in ['top','bottom','left','right']:
#         ax.spines[axis].set_linewidth(1.5)

    if legend:
        ax.legend(loc='best')

    return fig



[docs]
def interpolated_proxy_inference(interpolated_df, interpolated_proxy_df, proxy, legend = True, plot_samples = False, plot_mle = True, orientation = 'horizontal', section_legend = False, marker_size = 20, section_cmap = 'Spectral', **kwargs):
    """

    Plot interpolated proxy signal over time (by extending the posterior section age models to a new proxy not included in the inference model) using interpolated age models from :py:meth:`extend_age_model() <stratmc.inference.extend_age_model>` and interpolated proxy values from :py:meth:`interpolate_proxy() <stratmc.inference.interpolate_proxy>` in :py:mod:`stratmc.inference`.

    .. plot::

        from stratmc.data import load_object, load_trace
        from stratmc.inference import extend_age_model, interpolate_proxy
        from stratmc.plotting import interpolated_proxy_inference

        full_trace = load_trace('examples/example_docs_trace')
        example_sample_path = 'examples/example_sample_df'
        example_sample_path_d18o = 'examples/example_sample_df_d18o'
        example_ages_path = 'examples/example_ages_df'
        sample_df = load_object(example_sample_path)
        sample_df_d18o = load_object(example_sample_path_d18o)
        ages_df = load_object(example_ages_path)

        interpolated_df = extend_age_model(full_trace, sample_df, ages_df, ['d18o'], new_proxy_df = sample_df_d18o)
        ages_new = full_trace.X_new.X_new.values.ravel()
        interpolated_proxy_df = interpolate_proxy(interpolated_df, 'd18o', ages_new)

        interpolated_proxy_inference(interpolated_df, interpolated_proxy_df, 'd18o')

        plt.show()


    Parameters
    ----------

    interpolated_df: pandas.DataFrame
        :class:`pandas.DataFrame` with interpolated age draws and sample age summary statistics from :py:meth:`extend_age_model() <stratmc.inference.extend_age_model>` in :py:mod:`stratmc.inference`.

    interpolated_proxy_df: pandas.DataFrame
        :class:`pandas.DataFrame` with interpolated proxy values and summary statistics at target ages from :py:meth:`interpolate_proxy() <stratmc.inference.interpolate_proxy>` in :py:mod:`stratmc.inference`.

    proxy: str
        Name of new proxy (must match column name in ``interpolated_proxy_df``).

    legend: bool, optional
        Generate a legend. Defaults to ``True``.

    plot_samples: bool, optional
        Plot proxy observations by most likely posterior age. Defaults to ``False``.

    plot_mle: bool, optional
        Plot the maximum likelihood estimate. Defaults to ``True``.

    orientation: str, optional
        Orientation of figure ('horizontal' or 'vertical'). Defaults to 'horizontal'.

    marker_size: int, optional
        Size of markers if ``plot_samples`` is ``True``. Defaults to 20.

    section_legend: bool, optional
        Include section names in the legend (if ``plot_samples`` is ``True``). Defaults to ``False``.

    section_cmap: str, optional
        Name of seaborn color palette to use for sections (if ``plot_samples`` is ``True``). Defaults to 'Spectral'.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with interpolated proxy signal over time.

    """

    if 'sections' in kwargs:
        sections = list(kwargs['sections'])
    else:
        sections = np.unique(interpolated_df['section'])

    if orientation == 'horizontal':
        horizontal = True
        vertical = False
    if orientation == 'vertical':
        horizontal = False
        vertical = True

    if 'figsize' in kwargs:
        figsize = kwargs['figsize']
    elif horizontal:
        figsize = (9, 5)
    elif vertical:
        figsize = (5, 9)

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    fig=plt.figure(figsize=figsize)

    ax = fig.gca()

    cs = {}
    pal = sns.color_palette(section_cmap,n_colors=len(sections))
    for i in range(len(sections)):
        section = sections[i]
        cs[section] = pal[i]

    min_ages = []
    max_ages = []

    if plot_samples: # by max likelihood
        for section in sections:
            section_df = interpolated_df[interpolated_df['section']==section]

            if section_legend:
                label = section
            else:
                label = '_nolegend_'

            if vertical:
                ax.scatter(section_df[proxy],
                           section_df['mle'],
                           s = marker_size,
                           color = cs[section],
                           label = label,
                           edgecolors = 'k',
                           lw = 0.5,
                           zorder = 3)

            if horizontal:
                ax.scatter(section_df['mle'],
                           section_df[proxy],
                           s = marker_size,
                           color = cs[section],
                           label = label,
                           lw = 0.5,
                           edgecolors = 'k',
                           zorder = 3)

        sec_ages = section_df['mle'].values

        min_ages = np.append(min_ages, np.min(sec_ages))

        max_ages = np.append(max_ages, np.max(sec_ages))


    if vertical:
        ax.fill_betweenx(interpolated_proxy_df['age'],
                         interpolated_proxy_df['97.5'],
                         interpolated_proxy_df['2.5'],
                         color=(.95,.95,.95),
                         label='95% envelope',
                         linestyle = '--',
                         lw = 1.5,
                         edgecolor = 'k',
                         zorder = 0)

    if horizontal:
        ax.fill_between(interpolated_proxy_df['age'],
                        interpolated_proxy_df['97.5'],
                        interpolated_proxy_df['2.5'],
                        color=(.95,.95,.95),
                        label='95% envelope',
                        linestyle = '--',
                        edgecolor = 'k',
                        lw = 1.5,
                        zorder = 0)


    if vertical:
        ax.fill_betweenx(interpolated_proxy_df['age'],
                         interpolated_proxy_df['84'],
                         interpolated_proxy_df['16'],
                         color=(.8,.8,.8),
                         label='68% envelope',
                         edgecolor = 'k',
                         lw = 1.5,
                         zorder = 1)
    if horizontal:
        ax.fill_between(interpolated_proxy_df['age'],
                        interpolated_proxy_df['84'],
                        interpolated_proxy_df['16'],
                        color=(.8,.8,.8),
                        label='68% envelope',
                        edgecolor = 'k',
                        lw = 1.5,
                        zorder = 1)

    if plot_mle:
        if horizontal:
            ax.plot(interpolated_proxy_df['age'], interpolated_proxy_df['mle'], zorder = 2, color = 'k', linestyle = 'solid', lw = 2, label = 'Most likely ' + str(proxy))
        if vertical:
            ax.plot(interpolated_proxy_df['mle'], interpolated_proxy_df['age'], zorder = 2, color = 'k', linestyle = 'solid', lw = 2, label = 'Most likely ' + str(proxy))

    min_ages = np.append(min_ages, np.min(interpolated_proxy_df['age']))
    max_ages = np.append(max_ages, np.max(interpolated_proxy_df['age']))

    if vertical:
        ax.set_ylabel('Age (Ma)', fontsize = fs)
        ax.set_xlabel(proxy, fontsize = fs)

        ax.set_ylim([np.min(min_ages) - 1, np.max(max_ages) + 1])
        ax.invert_yaxis()

    if horizontal:
        ax.set_xlabel('Age (Ma)', fontsize = fs)
        ax.set_ylabel(proxy, fontsize = fs)

        ax.set_xlim([np.min(min_ages) - 1, np.max(max_ages) + 1])
        ax.invert_xaxis()

    ax.tick_params(bottom = True, top = True, left = True, right = True, direction = 'in', labelsize = fs, width = 1.5)

    if legend:
        ax.legend(loc='best')

    return fig




[docs]
def age_height_model(sample_df, ages_df, full_trace, include_excluded_samples = True, plot_samples = True, cmap = 'Spectral', legend = False, **kwargs):
    """
    Generate a posterior age-height plot for each section.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import age_height_model

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       example_ages_path = 'examples/example_ages_df'
       sample_df = load_object(example_sample_path)
       ages_df = load_object(example_ages_path)

       age_height_model(sample_df, ages_df, full_trace)

       plt.show()

    Parameters
    ----------

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    ages_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing age constraints for all sections.

    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    sections: list(str) or numpy.array(str), optional
        List of sections to plot. Defaults to all sections in ``sample_df``.

    cmap: str, optional
        Name of seaborn color palette to use for sections. Defaults to 'Spectral'.

    legend: bool, optional
        Generate a legend. Defaults to ``False``.

    include_excluded_samples: bool, optional
        Whether to consider excluded samples (``Exclude?`` is ``True`` in ``sample_df``) whose ages were passively tracked in the inference model. Defaults to ``True``.

    plot_samples: bool, optional
        Plot proxy observations by most likely posterior age. Defaults to ``True``.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with age-height models for each section.
    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']

    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'sections' in kwargs:
        sections = list(kwargs['sections'])
    else:
        sections = np.unique(sample_df.dropna(subset = proxies, how = 'all')['section'])

    sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections)

    cs = {}
    pal = sns.color_palette(cmap, n_colors=len(sections))

    cols = 4
    N = len(sections)
    rows = int(math.ceil(N/cols))

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (11, 2.5*rows))

    for n in range(N):
        section = sections[n]
        cs[section] = pal[n]
        ax = fig.add_subplot(gs[n])

        sec_ages = az.extract(full_trace.posterior)[str(section)+'_ages'].values # posterior_predictive

        section_df = sample_df[sample_df['section']==section]

        lo = np.percentile(sec_ages, 2.5, axis=1).flatten()
        hi = np.percentile(sec_ages, 97.5, axis=1).flatten()

        if include_excluded_samples:
            ax.fill_betweenx(section_df['height'],
                             hi,
                             lo,
                             color=(.95,.95,.95),
                             linestyle = 'dotted',
                             label=r'95% envelope')

        else:
            included_idx = ~section_df['Exclude?'].values.astype(bool)
            ax.fill_betweenx(section_df['height'][included_idx],
                             hi[included_idx],
                             lo[included_idx],
                             color=(.95,.95,.95),
                             linestyle='dotted',
                             label=r'95% envelope')

        lo = np.percentile(sec_ages,17,axis=1).flatten()
        hi = np.percentile(sec_ages,100-17,axis=1).flatten()

        if include_excluded_samples:
            ax.fill_betweenx(section_df['height'],
                             hi,
                             lo,
                             color=(.8,.8,.8),
                             linestyle='dashed',
                             label=r'66% envelope')
        else:
            ax.fill_betweenx(section_df['height'][included_idx],
                             hi[included_idx],
                             lo[included_idx],
                             color=(.8,.8,.8),
                             linestyle='dashed',
                             label=r'66% envelope')

        lo = np.percentile(sec_ages,33.5,axis=1).flatten()
        hi = np.percentile(sec_ages,100-33.5,axis=1).flatten()

        if include_excluded_samples:
            ax.fill_betweenx(section_df['height'],
                             hi,
                             lo,
                             color=(.65,.65,.65),
                             linestyle='solid',
                             label=r'33% envelope')
        else:
            ax.fill_betweenx(section_df['height'][included_idx],
                             hi[included_idx],
                             lo[included_idx],
                             color=(.65,.65,.65),
                             linestyle='solid',
                             label=r'33% envelope')

        if plot_samples:
            # calculate MLE
            max_like = np.zeros(sec_ages.shape[0])
            for i in np.arange(sec_ages.shape[0]):
                sample_ages = sec_ages[i,:]
                dx = np.linspace(np.min(sample_ages), np.max(sample_ages), 1000)
                max_like[i] = dx[np.argmax(gaussian_kde(sample_ages, bw_method = 1)(dx))]

            if include_excluded_samples:
                ax.scatter(max_like,
                        section_df['height'],
                        color = cs[section],
                        linewidth = 1,
                        linestyle = 'solid',
                        marker = 'x',
                        s = 10,
                        label = 'Most likely sample age')
            else:
                ax.scatter(max_like[included_idx],
                        section_df['height'][included_idx],
                        color = cs[section],
                        linewidth = 1,
                        linestyle = 'solid',
                        marker = 'x',
                        s = 10,
                        label = 'Most likely sample age')

        ax.errorbar(ages_df['age'][ages_df['section']==section],
                     ages_df['height'][ages_df['section']==section],
                     xerr = 2*ages_df['age_std'][ages_df['section']==section],
                     color = cs[section], fmt = 'none', capsize = 3)

        section_ages_df = ages_df[(ages_df['section']==section) & (~ages_df['Exclude?'])]
        ax.scatter(section_ages_df['age'][(~section_ages_df['intermediate intrusive?']) & (~section_ages_df['intermediate detrital?'])],
                    section_ages_df['height'][(~section_ages_df['intermediate intrusive?']) & (~section_ages_df['intermediate detrital?'])],
                    color = cs[section],
                    lw = 0.5,
                    edgecolor = 'k',
                    label = r'Age ($\pm2\sigma$)')


        if section_ages_df['age'][section_ages_df['intermediate intrusive?']].shape[0] > 0:
            ax.scatter(section_ages_df['age'][section_ages_df['intermediate intrusive?']],
                        section_ages_df['height'][section_ages_df['intermediate intrusive?']],
                       marker = 's',
                       color = cs[section],
                       edgecolor = 'k',
                       lw = 0.5,
                        label = r'Intrusive Age ($\pm2\sigma$)')

        if section_ages_df['age'][section_ages_df['intermediate detrital?']].shape[0] > 0:
            ax.scatter(section_ages_df['age'][section_ages_df['intermediate detrital?']],
                        section_ages_df['height'][section_ages_df['intermediate detrital?']],
                        marker = '^',
                        color = cs[section],
                        edgecolor = 'k',
                        lw = 0.5,
                        label = r'Detrital Age ($\pm2\sigma$)')

        if legend:
            plt.legend(loc = 'upper right',
                             bbox_to_anchor=(1, 1),
                             markerfirst = False,
                             frameon = False,
                             fontsize = 8)

        ax.axis('tight')
        ax.set_xlabel('Age (Ma)', fontsize = fs)
        ax.set_ylabel('Height (m)', fontsize = fs)
        ax.tick_params(bottom = True, top = True, left = True, right = True, direction = 'in', labelsize = fs)
        ax.set_title(section, fontsize = fs)

    fig.tight_layout()

    plt.subplots_adjust(wspace=0.35)

    return fig



[docs]
def section_proxy_signal(full_trace, sample_df, ages_df, include_radiometric_ages = False, plot_constraints = False, plot_mle = False, yax = 'height', legend = False, cmap = 'Spectral', **kwargs):
    """
    Map the posterior proxy signal back to height in each section (using its most likely posterior age model), and plot alongside the proxy observations (plotted by most likely posterior age).

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import section_proxy_signal

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       example_ages_path = 'examples/example_ages_df'
       sample_df = load_object(example_sample_path)
       ages_df = load_object(example_ages_path)

       section_proxy_signal(full_trace, sample_df, ages_df)

       plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    ages_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing age constraints for all sections.

    include_radiometric_ages: bool, optional
        Whether to consider radiometric ages in the posterior age model for each section. Defaults to ``False``.

    plot_constraints: bool, optional
        Plot age constraints for each section as dashed lines. Defaults to ``False``.

    plot_mle: bool, optional
        Plot the maximum likelihood estimate for the proxy signal as a line. Defaults to ``False``.

    yax: str, optional
        Scale for the y-axis ('height' or 'age'). Defaults to 'height'.

    legend: bool, optional
        Generate a legend. Defaults to ``True``.

    cmap: str, optional
        Name of seaborn color palette to use for sections. Defaults to 'Spectral'.

    proxy: str, optional
        Tracer to plot; only required if more than one proxy was included in the inference.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with inferred proxy signal mapped to height in each section.

    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']

    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']

    else:
        proxy = proxies[0]

    if 'sections' in kwargs:
        sections = list(kwargs['sections'])
    else:
        # only want to map signal back to sections that actually have data for this proxy
        sections = np.unique(sample_df.dropna(subset = [proxy], how = 'all')['section'])

    sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections)

    cs = {}
    pal = sns.color_palette(cmap, n_colors=len(sections))

    N = len(sections)
    cols = np.min([N, 4])
    rows = int(math.ceil(N/cols))

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (2.5 * cols, 4*rows))

    proxy_pred = az.extract(full_trace.posterior_predictive)['f_pred_' + proxy].values
    ages_new = full_trace.X_new.X_new.values

    if plot_mle:
        # calculate MLE
        dy = np.linspace(np.min(proxy_pred), np.max(proxy_pred), 200)
        max_like = np.zeros(ages_new.size)
        for i in np.arange(ages_new.size):
            time_slice = proxy_pred[i,:]
            max_like[i] = dy[np.argmax(gaussian_kde(time_slice, bw_method = 1)(dy))]
        max_like = gaussian(max_like, 2)

    mapped_age_models = map_ages_to_section(full_trace, sample_df, ages_df, sections = sections, include_radiometric_ages = include_radiometric_ages)

    for n in range(N):
        section = sections[n]
        cs[section] = pal[n]
        ax = fig.add_subplot(gs[n])

        ax.tick_params(bottom = True, top = False, left = True, right = False, direction = 'out', labelsize = fs, width = 1.5)

        ax.set_title(section, fontsize = fs)

        # calculate maximum likelihood age for each sample
        section_df = sample_df[sample_df['section']==section]

        proxy_idx = (~np.isnan(section_df[proxy])) & (~section_df['Exclude?'].values.astype(bool))
        excluded_idx = (~np.isnan(section_df[proxy])) & (section_df['Exclude?'])

        section_age_model = mapped_age_models[mapped_age_models['section']==section]
        section_age_vec = section_age_model['age'].values
        above = (ages_new <= np.max(section_age_vec))
        below = (ages_new >= np.min(section_age_vec))

        age_idx = above & below

        # plot data
        if yax == 'height':

            # samples included in inference
            ax.scatter(section_df[proxy].values[proxy_idx],
                        section_df['height'].values[proxy_idx],
                        color = cs[section],
                        edgecolor = 'k',
                       zorder = 3)

            # excluded samples
            ax.scatter(section_df[proxy].values[excluded_idx],
                        section_df['height'].values[excluded_idx],
                        color = 'none',
                        edgecolor = cs[section],
                       zorder = 4)

            # plot posterior proxy signal
            hi = np.percentile(proxy_pred, 97.5, axis=1).flatten()
            lo = np.percentile(proxy_pred, 2.5, axis=1).flatten()

            ax.fill_betweenx(section_age_model['interpolated height'],
                             hi[age_idx],
                             lo[age_idx],
                             color=(.95,.95,.95),
                             label='95% envelope',
                             linestyle = 'dotted',
                             lw = 1.5,
                             edgecolor = 'k',
                             zorder = 0)

            hi = np.percentile(proxy_pred, 100-17, axis=1).flatten()
            lo = np.percentile(proxy_pred, 17, axis=1).flatten()

            ax.fill_betweenx(section_age_model['interpolated height'],
                             hi[age_idx],
                             lo[age_idx],
                             color=(.8,.8,.8),
                             label='66% envelope',
                             linestyle = 'dashed',
                             lw = 1.5,
                             edgecolor = 'k',
                             zorder = 1)

            hi = np.percentile(proxy_pred, 100-33.5, axis=1).flatten()
            lo = np.percentile(proxy_pred, 33.5, axis=1).flatten()

            ax.fill_betweenx(section_age_model['interpolated height'],
                             hi[age_idx],
                             lo[age_idx],
                             color=(.65,.65,.65),
                             label='33% envelope',
                             linestyle = 'solid',
                             lw = 1.5,
                             edgecolor = 'k',
                             zorder = 2)

            if plot_mle:
                ax.plot(max_like[age_idx],
                        section_age_model['interpolated height'],
                        color = 'k',
                        lw = 2,
                        label = 'Most likely',
                        zorder = 3)

            if plot_constraints:
                section_ages_df = ages_df[(ages_df['section']==section) & (~ages_df['Exclude?'])]
                for h in section_ages_df['height']:
                    ax.axhline(h, color = cs[section], linestyle = 'dashed', zorder = -1)

            if proxy == 'd13c':
                ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}}$ (‰)', fontsize = fs)

            else:
                ax.set_xlabel(proxy, fontsize = fs)


        elif yax == 'age':

            # plot posterior proxy signal
            hi = np.percentile(proxy_pred, 97.5, axis=1).flatten()
            lo = np.percentile(proxy_pred, 2.5, axis=1).flatten()

            ax.fill_betweenx(ages_new[age_idx],
                             hi[age_idx],
                             lo[age_idx],
                             color=(.95,.95,.95),
                             label='95% envelope',
                             linestyle = 'dotted',
                             lw = 1.5,
                             edgecolor = 'k',
                             zorder = 0)

            hi = np.percentile(proxy_pred, 100-17, axis=1).flatten()
            lo = np.percentile(proxy_pred, 17, axis=1).flatten()

            ax.fill_betweenx(ages_new[age_idx],
                             hi[age_idx],
                             lo[age_idx],
                             color=(.8,.8,.8),
                             label='66% envelope',
                             linestyle = 'dashed',
                             lw = 1.5,
                             edgecolor = 'k',
                             zorder = 1)

            hi = np.percentile(proxy_pred, 100-33.5, axis=1).flatten()
            lo = np.percentile(proxy_pred, 33.5, axis=1).flatten()

            ax.fill_betweenx(ages_new[age_idx],
                             hi[age_idx],
                             lo[age_idx],
                             color=(.65,.65,.65),
                             label='33% envelope',
                             linestyle = 'solid',
                             lw = 1.5,
                             edgecolor = 'k',
                             zorder = 2)

            if plot_mle:
                ax.plot(max_like[age_idx],
                        ages_new[age_idx],
                        color = 'k',
                        lw = 2,
                        label = 'Most likely',
                        zorder = 3)

            sec_ages = az.extract(full_trace.posterior)[str(section)+'_ages'].values

            max_like_samples = np.zeros(sec_ages.shape[0])
            for i in np.arange(sec_ages.shape[0]):
                sample_ages = sec_ages[i,:]
                dx = np.linspace(np.min(sample_ages), np.max(sample_ages), 1000)
                max_like_samples[i] = dx[np.argmax(gaussian_kde(sample_ages, bw_method = 1)(dx))]

            ax.scatter(section_df[proxy].values[proxy_idx],
                        max_like_samples[proxy_idx],
                        color = cs[section],
                        edgecolor = 'k',
                       zorder = 4)

            ax.scatter(section_df[proxy].values[excluded_idx],
                        max_like_samples[excluded_idx],
                        color = 'none',
                        edgecolor = cs[section],
                       zorder = 5)


            if plot_constraints:
                section_ages_df = ages_df[(ages_df['section']==section)  & (~ages_df['Exclude?'])]
                for age in section_ages_df['age']:
                    ax.axhline(age, color = cs[section], linestyle = 'dashed', zorder = -1)

            ax.invert_yaxis()

            if proxy == 'd13c':
                ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}}$ (‰)', fontsize = fs)

            else:
                ax.set_xlabel(proxy, fontsize = fs)


        if (n == 0) | ((n % 4) == 0):
            if yax == 'height':
                ax.set_ylabel('Height (m)', fontsize = fs)

            elif yax == 'age':
                ax.set_ylabel('Age (Ma)', fontsize = fs)

        if (n == 0) and legend:
            ax.legend()

    fig.tight_layout()

    return fig




[docs]
def covariance_hyperparameters(full_trace, figsize = (4, 3.5), **kwargs):

    """
    Plot prior and posterior distributions for the lengthscale (:math:`\\ell`) and variance (:math:`\\sigma`) hyperparameters of the :class:`pymc.gp.cov.ExpQuad <pymc.gp.cov.ExpQuad>` Gaussian process covariance kernel:

    .. math::

        k(x, x') = \\sigma^2 \\mathrm{exp} \\left[-\\frac{(x - x')^2}{2 \\ell^2} \\right]

    .. plot::

       from stratmc.data import load_trace
       from stratmc.plotting import covariance_hyperparameters

       full_trace = load_trace('examples/example_docs_trace')

       covariance_hyperparameters(full_trace)

       plt.show()

    Parameters
    ----------

    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    proxy: str, optional
        Tracer to plot model parameters for (each proxy has a different covariance kernel); only required if more than one proxy was included in the inference.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with prior and posterior model parameters.
    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']

    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']

    else:
        proxy = proxies[0]

    fig, ax = plt.subplots(2, figsize = figsize)

    sns.kdeplot(full_trace.prior['gp_ls_' + proxy].values.ravel(),
                color='indianred',
                label = 'Prior',
                ax = ax[0],
                cut = 0)

    sns.kdeplot(full_trace.posterior['gp_ls_' + proxy].values.ravel(),
                color = 'indianred',
                label = 'Posterior',
                fill = True,
                edgecolor = 'None',
                ax = ax[0],
                cut = 0)

    ax[0].set_xlabel('Lengthscale', fontsize=fs)
    ax[0].ticklabel_format(axis="x", style="sci", scilimits=(0,0))
    ax[0].tick_params(labelsize=fs)
    ax[0].set_ylabel("")
    ax[0].set_yticks([])
    ax[0].ticklabel_format(style='plain')
    ax[0].legend()

    sns.kdeplot(full_trace.prior['gp_var_' + proxy].values.ravel(),
                color='#87BED5',
                label = 'prior',
                ax = ax[1],
                cut = 0)

    sns.kdeplot(full_trace.posterior['gp_var_' + proxy].values.ravel(),
                color = '#87BED5',
                label = 'posterior',
                fill = True,
                edgecolor = 'None',
                ax = ax[1],
                cut = 0)

    ax[1].set_xlabel('Variance', fontsize=fs)
    ax[1].ticklabel_format(axis="x", style="sci", scilimits=(0,0))
    ax[1].tick_params(labelsize=fs)
    ax[1].set_ylabel("")
    ax[1].set_yticks([])
    ax[1].ticklabel_format(style='plain')
    ax[1].legend()

    fig.tight_layout()

    return fig



[docs]
def section_summary(sample_df, ages_df, full_trace, section, plot_excluded_samples = False, plot_noise_prior = False, plot_offset_prior = False, include_age_constraints_sedrate = True, figsize = (8, 9)):

    """
    For a given section, plot posterior estimates of sample age, sedimentation rate, noise, and offset. Noise and offset terms must be either per-section or global; to plot per-sample noise and offset terms, use :py:meth:`noise_summary() <stratmc.plotting>` and :py:meth:`offset_summary() <stratmc.plotting>`.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import section_summary

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       example_ages_path = 'examples/example_ages_df'
       sample_df = load_object(example_sample_path)
       ages_df = load_object(example_ages_path)

       section_summary(sample_df, ages_df, full_trace, '1')

       plt.show()

    Parameters
    ----------
    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    ages_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing age constraints for all sections.

    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    section: str
        Name of target section.

    plot_excluded_samples: bool, optional
        Plot age estimates for proxy observations that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``.

    plot_noise_prior: bool, optional
        Plot prior distribution for noise term. Defaults to ``False``.

    plot_offset_prior: bool, optional
        Plot prior distribution for offset term. Defaults to ``False``.

    include_age_constraints_sedrate: bool, optional
        Include age constraints in sedimentation rate calculations. Defaults to ``True``.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure summarizing posterior sample ages, sedimentation rate, and posterior noise and offset terms for the input section.
    """

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])


    if str(section) + '_section_noise_' + proxies[0] in list(full_trace.posterior.keys()):
        noise_type = 'section'

    else:
        noise_type = 'groups'

    if str(section) + '_section_offset_' + proxies[0] in list(full_trace.posterior.keys()):
        offset_type = 'section'

    else:
        group_offset_vars = [
        l
        for l in list(full_trace["posterior"].data_vars.keys())
        if f"{'_group_offset_'}" in l
        ]

        if len(group_offset_vars) > 0:
            offset_type = 'groups'
        else:
            offset_type = 'none'

    if (noise_type == 'none') and (offset_type == 'none'):
        n_rows = 3

    elif ((noise_type != 'none') or (offset_type != 'none')) and not ((noise_type != 'none') and (offset_type != 'none')):
        n_rows = 4

    elif ((noise_type != 'none') and (offset_type != 'none')):
        n_rows = 5

    sample_df, ages_df = clean_data(sample_df, ages_df, proxies, [section])

    fig, ax = plt.subplots(n_rows, 1, figsize = figsize, constrained_layout=True, gridspec_kw={'height_ratios': [1, 1, 1.5] + [0.5] * (n_rows - 3)})

    # link axes with age on the x-axis
    ax[0].sharex(ax[1])
    ax[2].sharex(ax[1])

    pal = sns.color_palette("flare", n_colors = np.mean(az.extract(full_trace.posterior)[str(section)+'_ages'].values, axis = 1).size, desat=0.8)
    sns.set_palette(pal)

    samples = np.mean(az.extract(full_trace.posterior)[str(section)+'_ages'], axis = 1).size # posterior_predictive

    sec_ages = az.extract(full_trace.posterior)[str(section)+'_ages'].values # posterior_predictive

    section_df = sample_df[sample_df['section']==section]

    excluded_idx = section_df['Exclude?'].values

    for i in range(samples):
        if not excluded_idx[i]:
            sns.kdeplot(sec_ages[i,:].ravel().astype(float),
                        label = 'posterior',
                        fill = True,
                        edgecolor = None,
                        color = pal[i],
                        alpha = 0.3,
                        ax = ax[0],
                        cut = 0)
        else:
            if plot_excluded_samples:
                sns.kdeplot(sec_ages[i,:].ravel().astype(float),
                            label = 'posterior',
                            fill = True,
                            edgecolor = None,
                            color = pal[i],
                            alpha = 0.3,
                            ax = ax[0],
                            cut = 0)

    yl = ax[0].get_ylim()

    ax[0].vlines(ages_df['age'][ages_df['section']==section],
                 yl[0],
                 yl[1],
                 color = '#8c8684',
                 linestyle = 'dashed',
                 zorder = 3,
                 label = 'Age constraint')

    for i in range(len(ages_df['age'][ages_df['section']==section])):
        ax[0].axvspan(ages_df['age'][ages_df['section']==section].values[i] - ages_df['age_std'][ages_df['section']==section].values[i],
                  ages_df['age'][ages_df['section']==section].values[i] + ages_df['age_std'][ages_df['section']==section].values[i],
                    alpha=0.5,
                    color ='#8c8684')

    ax[0].invert_xaxis()

    ax[0].set_ylim(yl)

    ax[0].set_ylabel('Sample ages', fontsize = 12)
    ax[0].set_yticks([])

    # age constraint posteriors
    constraint_ages = az.extract(full_trace.posterior)[str(section)+'_radiometric_age'].values
    prior_constraint_ages = az.extract(full_trace.prior)[str(section)+'_radiometric_age'].values
    samples = np.mean(constraint_ages, axis = 1).size

    pal = sns.color_palette('crest',n_colors=samples)

    for i in range(samples):
        if i == 0:
            post_label = 'Posterior'
            prior_label = 'Prior'
        else:
            post_label = '_nolegend'
            prior_label = '_nolegend'
        sns.kdeplot(constraint_ages[i,:].ravel(),
                    label = post_label, fill = True, color = pal[i], edgecolor='None', alpha = 0.3, ax = ax[1], cut = 0)
        sns.kdeplot(prior_constraint_ages[i,:].ravel(),
                    label = prior_label, color = pal[i], fill = False, ax = ax[1], cut = 0)

    ax[1].set_ylabel('Age constraints', fontsize = 12)
    ax[1].set_yticks([])

    ax[1].legend()

    # sedimentation rate
    age_bins = 50
    rate_bins = 50

    rate_df = accumulation_rate(full_trace, sample_df, ages_df, sections = section, method = 'successive', include_age_constraints = include_age_constraints_sedrate)

    age_bin_edges = np.linspace(np.min(rate_df['top_age']), np.max(rate_df['base_age']), age_bins)

    rate_bin_edges = np.logspace(np.log10(rate_df['rate'].min()), np.log10(rate_df['rate'].max()), rate_bins)

    # get age bin centers
    age_centers = (age_bin_edges[:-1] + age_bin_edges[1:]) / 2

    age_vec = []
    rate_vec = []

    for rate, base, top in zip(rate_df['rate'], rate_df['base_age'], rate_df['top_age']):
        for i in np.arange(len(age_bin_edges)-1):
            center = age_centers[i]
            current_bin = pd.Interval(age_bin_edges[i], age_bin_edges[i+1], closed = 'left')
            current_sample_pair = pd.Interval(top, base, closed = 'right')
            result = current_bin.overlaps(current_sample_pair)
            if result:
                age_vec.append(center)
                rate_vec.append(rate)

    H, xedges, yedges = np.histogram2d(age_vec, rate_vec, bins=[age_bin_edges, rate_bin_edges], density = False)

    H[H == 0] = np.nan
    H = H/np.nansum(H, axis=1,keepdims=1) # each column (age bin) should sum to 1

    m = ax[2].pcolormesh(xedges, yedges, H.T, cmap = 'jet')

    ax[2].set_yscale('log')
    ax[2].set_ylabel('LOG$_{10}$ (Accumulation rate [m/Myr])', fontsize = 12)

    ax[2].set_xlabel('Age (Ma)', fontsize = 12)

    cb = plt.colorbar(m)
    cb.set_label(label='Probability density', fontsize = 12)

    # noise posteriors
    if noise_type != 'none':

        if noise_type == 'section':
            pal = sns.color_palette("deep", n_colors = len(proxies))

        if noise_type == 'groups':
            palettes = ['crest', 'flare', 'Purples', 'Grays', 'Reds', 'Greens', 'YlOrBr', 'Oranges']

        for i in np.arange(len(proxies)):
            proxy = proxies[i]

            if noise_type == 'section':

                posterior_noise_dist = az.extract(full_trace.posterior)[str(section) + '_section_noise_' + proxy].values.ravel()
                prior_noise_dist = az.extract(full_trace.prior)[str(section) + '_section_noise_' + proxy].values.ravel()

                sns.kdeplot(posterior_noise_dist,
                            fill = True,
                            ax = ax[3],
                            edgecolor='none',
                            color = pal[i],
                            alpha = 0.3,
                            label = proxy + ' posterior',
                            cut = 0)

                if plot_noise_prior:
                    sns.kdeplot(prior_noise_dist,
                                fill = False,
                                ax = ax[3],
                                color = pal[i],
                                alpha = 0.3,
                                label = proxy + ' prior',
                                cut = 0)

            elif noise_type == 'groups':

                noise_vars = [
                l
                for l in list(full_trace["posterior"].data_vars.keys())
                if f"{'_group_noise_' + str(proxy)}" in l
                ]

                pal = sns.color_palette(palettes[i], n_colors = len(noise_vars))

                for j, var in enumerate(noise_vars):
                    posterior_noise_dist =  az.extract(full_trace.posterior)[var].values.ravel()

                    sns.kdeplot(posterior_noise_dist,
                                    fill = True,
                                    ax = ax[3],
                                    color = pal[j],
                                    alpha = 0.3,
                                    label = var + ' posterior',
                                    cut = 0
                                    )

                    if plot_noise_prior:
                        prior_noise_dist =  az.extract(full_trace.prior)[var].values.ravel()

                        sns.kdeplot(prior_noise_dist,
                                    fill = False,
                                    ax = ax[3],
                                    color = pal[j],
                                    alpha = 0.3,
                                    label = var + ' prior',
                                    cut = 0
                                    )

                if (len(proxies) > 1) | (noise_type == 'groups'):
                    ax[3].legend()

        ax[3].set_xlabel('Noise', fontsize = 12)
        ax[3].set_yticks([])
        ax[3].set_ylabel(None)

    # offset posteriors
    if offset_type != 'none':

        if offset_type == 'section':
            pal = sns.color_palette("deep", n_colors = len(proxies))

        if offset_type == 'groups':
            palettes = ['crest', 'flare', 'Purples', 'Grays', 'Reds', 'Greens', 'YlOrBr', 'Oranges']

        for i in np.arange(len(proxies)):
            proxy = proxies[i]

            if offset_type == 'section':
                posterior_offset_dist = az.extract(full_trace.posterior)[str(section) + '_section_offset_' + proxy].values.ravel()

                sns.kdeplot(posterior_offset_dist,
                                fill = True,
                                ax = ax[n_rows - 1],
                                edgecolor='none',
                                color = pal[i],
                                alpha = 0.3,
                                label = proxy + ' posterior',
                                cut = 0)

                if plot_offset_prior:
                    prior_offset_dist = az.extract(full_trace.prior)[str(section) + '_section_offset_' + proxy].values.ravel()

                    sns.kdeplot(prior_offset_dist,
                                    fill = False,
                                    ax = ax[n_rows - 1],
                                    color = pal[i],
                                    alpha = 0.3,
                                    label = proxy + ' prior',
                                    cut = 0)

            elif offset_type == 'groups':

                offset_vars = [
                l
                for l in list(full_trace["posterior"].data_vars.keys())
                if f"{'_group_offset_' + str(proxy)}" in l
                ]

                pal = sns.color_palette(palettes[i], n_colors = len(offset_vars))

                for j, var in enumerate(offset_vars):

                    posterior_offset_dist =  az.extract(full_trace.posterior)[var].values.ravel()

                    sns.kdeplot(posterior_offset_dist,
                                fill = True,
                                ax = ax[n_rows - 1],
                                edgecolor='none',
                                color = pal[j],
                                alpha = 0.3,
                                label = var + ' posterior',
                                cut = 0)


                    if plot_offset_prior:
                        prior_offset_dist =  az.extract(full_trace.prior)[var].values.ravel()

                        sns.kdeplot(prior_offset_dist,
                                fill = False,
                                ax = ax[n_rows - 1],
                                color = pal[j],
                                alpha = 0.3,
                                label = var + ' prior',
                                cut = 0)

            if (len(proxies) > 1) | (offset_type == 'groups'):
                    ax[n_rows - 1].legend()

            ax[n_rows - 1].set_xlabel('Offset', fontsize = 12)
            ax[n_rows - 1].set_yticks([])
            ax[n_rows - 1].set_ylabel(None)

    plt.setp(ax[0].get_xticklabels(), visible=False)
    plt.setp(ax[1].get_xticklabels(), visible=False)

    for axis in ax.ravel():
        axis.tick_params(bottom = True, top = False, left = False, right = False, direction = 'in', labelsize = 12)

    fig.suptitle('Section: ' + str(section), fontsize = 14)

    return fig



[docs]
def noise_summary(full_trace, **kwargs):

    """
    Plot posterior noise distributions for each section or group of samples (depending on ``noise_type`` used in :py:meth:`build_model() <stratmc.model.build_model>`) for a given proxy. If multiple proxies were included in the inference, pass a ``proxy`` argument to specify which noise terms to plot.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import noise_summary

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       sample_df = load_object(example_sample_path)
       sections = np.unique(sample_df['section'].values)

       noise_summary(full_trace)

       plt.show()

    Parameters
    ----------

    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    proxy: str, optional
        Target proxy; only required if more than one proxy was included in the inference.


    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with noise distributions for each section or group of samples.
    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']
    else:
        proxy = proxies[0]

    section_noise_vars = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if f"{'_section_noise_' + str(proxy)}" in l
            ]

    if len(section_noise_vars) > 0:
        noise_vars = section_noise_vars

    else:
        noise_vars = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if f"{'_group_noise_' + str(proxy)}" in l
            ]

        if len(noise_vars) == 0:
            sys.exit(f"No noise terms in the model")

    N = len(noise_vars)
    cols = int(np.min([4, N]))

    pal = sns.color_palette("deep", n_colors = len(noise_vars))

    rows = int(math.ceil(N/cols))

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (10, 2.5*rows))

    for n in range(N):

        ax = fig.add_subplot(gs[n])

        var = noise_vars[n]

        ax.set_title(str(var), fontsize = fs)

        posterior_noise_dist = full_trace.posterior[var].values.ravel()

        sns.kdeplot(posterior_noise_dist,
                    ax = ax,
                    fill = True,
                    edgecolor='None',
                    color = pal[n],
                    alpha = 0.5,
                    label = proxy,
                    cut = 0)

        ax.set_ylabel('')
        ax.set_yticks([])
        ax.tick_params(labelsize = fs)

    fig.supxlabel('Noise (%s)' % proxy, fontsize = fs)

    fig.tight_layout()

    plt.subplots_adjust(wspace=0.15, hspace=0.4)

    return fig




[docs]
def offset_summary(full_trace, **kwargs):

    """

    Plot posterior offset distributions for each section or group of samples (depending on ``offset_type`` used in :py:meth:`build_model() <stratmc.model.build_model>`). If multiple proxies were included in the inference, pass a ``proxy`` argument to specify which offset terms to plot.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import offset_summary

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       sample_df = load_object(example_sample_path)

       offset_summary(full_trace)

       plt.show()

    Parameters
    ----------

    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    proxy: str, optional
        Target proxy; only required if more than one proxy was included in the inference.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with offset distributions for each section or group of samples.
    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']
    else:
        proxy = proxies[0]

    section_offset_vars = [
        l
        for l in list(full_trace["posterior"].data_vars.keys())
        if f"{'_section_offset_' + str(proxy)}" in l
        ]

    if len(section_offset_vars) > 0:
        offset_vars = section_offset_vars

    else:
        offset_vars = [
        l
        for l in list(full_trace["posterior"].data_vars.keys())
        if f"{'_group_offset_' + str(proxy)}" in l
        ]

        if len(offset_vars) == 0:
            sys.exit(f"No offset terms in the model")


    N = len(offset_vars)
    cols = int(np.min([4, N]))

    pal = sns.color_palette("deep", n_colors = len(offset_vars))

    rows = int(math.ceil(N/cols))

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (10, 2.5*rows))

    for n in range(N):
        ax = fig.add_subplot(gs[n])

        var = offset_vars[n]

        ax.set_title(var, fontsize = fs)

        # shape = samples x draws
        posterior_offset_dist = az.extract(full_trace.posterior)[var].values.ravel()

        sns.kdeplot(posterior_offset_dist,
                        ax = ax,
                        fill = True,
                        edgecolor = 'None',
                        color = pal[n],
                        alpha = 0.3,
                        label = var,
                        cut = 0
                        )

        ax.set_ylabel('')
        ax.set_yticks([])
        ax.tick_params(labelsize = fs)


    fig.supxlabel('Offset (%s)' % proxy, fontsize = fs)

    fig.tight_layout()

    plt.subplots_adjust(wspace=0.15, hspace=0.4)

    return fig



[docs]
def section_proxy_residuals(full_trace, sample_df, legend = True, cmap = 'Spectral', plot_mle = False, include_excluded_samples = False, **kwargs):
    """

    Plot the residuals between the observed proxy values for each section and the inferred proxy signal (using the posterior section age models to map the signal back to height in section). Use to check for stratigraphic trends in the residuals, which may give insight to the processes that cause noisy sections to deviate from the inferred common signal. If multiple proxies were included in the inference, pass a ``proxy`` argument.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import section_proxy_residuals

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       sample_df = load_object(example_sample_path)

       section_proxy_residuals(full_trace, sample_df)

       plt.show()

    Parameters
    ----------

    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    legend: bool, optional
        Generate a legend. Defaults to ``True``.

    cmap: str, optional
        Name of seaborn color palette to use for sections. Defaults to 'Spectral'.

    plot_mle: bool, optional
        Plot the maximum likelihood estimate as a line. Defaults to ``False``.

    proxy: str, optional
        Target proxy; only required if more than one proxy was included in the inference.

    include_excluded_samples: bool, optional
        Whether to plot the residuals for samples that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``.


    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with residuals for each section.
    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']

    else:
        proxy = proxies[0]

    if 'sections' in kwargs:
        sections = list(kwargs['sections'])
    else:
        sections = np.unique(sample_df[~np.isnan(sample_df[proxy])]['section'])


    cs = {}
    pal = sns.color_palette(cmap, n_colors=len(sections))

    N = len(sections)
    cols = np.min([N, 4])
    rows = int(math.ceil(N/cols))

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (2.5 * cols, 4*rows))

    proxy_pred = az.extract(full_trace.posterior_predictive)['f_pred_' + proxy].values
    ages_new = full_trace.X_new.X_new.values

    for n in range(N):
        section = sections[n]
        cs[section] = pal[n]
        ax = fig.add_subplot(gs[n])

        ax.set_title(section, fontsize = fs)

        section_sample_df = sample_df[sample_df['section'] == section]

        if include_excluded_samples:
            included_idx = (~np.isnan(section_sample_df[proxy]))
        else:
            included_idx = (~np.isnan(section_sample_df[proxy])) & (~section_sample_df['Exclude?'])

        section_ages = az.extract(full_trace.posterior)[str(section) + '_ages'].values[included_idx, :]

        section_proxy_true = section_sample_df[included_idx][proxy].values
        section_heights = section_sample_df[included_idx]['height'].values

        section_proxy_pred = np.ones((len(section_proxy_true), proxy_pred.shape[1])) * np.nan
        section_residuals =  np.ones((len(section_proxy_true), proxy_pred.shape[1])) * np.nan

        for i in np.arange(proxy_pred.shape[1]):
            section_proxy_pred[:, i] = np.interp(section_ages[:, i], ages_new, proxy_pred[:, i])
            # residual = actual - predicted
            section_residuals[:, i] = section_proxy_true - section_proxy_pred[:, i]

        hi = np.percentile(section_residuals, 97.5, axis = 1).flatten()
        lo = np.percentile(section_residuals, 2.5, axis = 1).flatten()

        ax.fill_betweenx(section_heights,
                             hi,
                             lo,
                             color=(.95,.95,.95),
                             label='95% envelope',
                             linestyle = 'dotted',
                             lw = 1.5,
                             edgecolor = 'k',
                             zorder = 0)

        hi = np.percentile(section_residuals, 100-17, axis=1).flatten()
        lo = np.percentile(section_residuals, 17, axis=1).flatten()

        ax.fill_betweenx(section_heights,
                         hi,
                         lo,
                         color=(.8,.8,.8),
                         label='66% envelope',
                         linestyle = 'dashed',
                         lw = 1.5,
                         edgecolor = 'k',
                         zorder = 1)

        hi = np.percentile(section_residuals, 100-33.5, axis=1).flatten()
        lo = np.percentile(section_residuals, 33.5, axis=1).flatten()

        ax.fill_betweenx(section_heights,
                         hi,
                         lo,
                         color=(.65,.65,.65),
                         label='33% envelope',
                         linestyle = 'solid',
                         lw = 1.5,
                         edgecolor = 'k',
                         zorder = 2)

        if plot_mle:
            # calculate mle
            dy = np.linspace(np.min(section_residuals.ravel()), np.max(section_residuals.ravel()), 200)
            max_like = np.zeros(section_heights.size)
            for i in np.arange(section_heights.size):
                height_slice = section_residuals[i,:]
                max_like[i] = dy[np.argmax(gaussian_kde(height_slice, bw_method = 1)(dy))]

            ax.plot(max_like,
                    section_heights,
                    color = 'k',
                    lw = 2,
                    zorder = 3,
                    label = 'Most likely')

        ax.set_ylabel('Height (m)', fontsize = fs)

        if legend and n == 0:
            ax.legend(loc = 'upper right')

    fig.supxlabel('Residual ' + str(proxy) + ' value (observed - predicted)', fontsize = fs)

    fig.tight_layout()

    return fig



[docs]
def sample_ages(full_trace, sample_df, section, plot_excluded_samples = False, plot_prior = True, cmap = 'viridis'):
    """
    Plot sample age prior and posterior distributions for a given section. Each subplot contains posterior distributions for a different sample.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import sample_ages

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       sample_df = load_object(example_sample_path)
       section = '1'

       sample_ages(full_trace, sample_df, section)

       plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of posterior and prior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    section: str
        Name of target section.

    plot_excluded_samples: bool, optional
        Plot age distributions for proxy observations that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``.

    plot_prior: bool, optional
        Plot prior distributions for sample ages. Defaults to ``True``.

    cmap: str, optional
        Name of seaborn color palette to use for age distributions. Defaults to 'viridis'.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with prior and posterior sample age distributions.
    """

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    sample_df, _ = clean_data(sample_df, None, proxies, [section])

    vals = az.extract(full_trace.posterior)[str(section)+'_ages'].values
    prior_vals = az.extract(full_trace.prior)[str(section)+'_ages'].values

    print(vals.shape)
    print(prior_vals.shape)

    if not plot_excluded_samples:
        included_idx = ~sample_df['Exclude?'].values.astype(bool)
        # shape = (samples x draws)
        vals = vals[included_idx, :]
        prior_vals = prior_vals[included_idx, :]

    else:
        excluded_idx = sample_df['Exclude?'].values

    samples = np.mean(vals, axis = 1).size

    cols = 5
    N = samples
    rows = int(math.ceil(N/cols))

    pal = sns.color_palette(cmap,n_colors=samples)

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (11, 1.5*rows))

    excluded_count = 0
    for i in range(samples):
        ax = fig.add_subplot(gs[i])

        if plot_excluded_samples:
            if excluded_idx[i]:
                linestyle = 'dashed'
                edgecolor = pal[i]
                post_label = 'Posterior (excluded sample)'
                prior_label = 'Prior (excluded sample)'
                excluded_count += 1

            else:
                linestyle = 'solid'
                edgecolor = 'none'
                post_label = 'Posterior'
                prior_label = 'Prior'

        else:
            linestyle = 'solid'
            edgecolor = 'none'
            post_label = 'Posterior'
            prior_label = 'Prior'

        sns.kdeplot(vals[i,:].ravel(),
                    label = post_label, fill = True, color = pal[i], edgecolor=edgecolor, linestyle = linestyle, alpha = 0.3, ax = ax, cut = 0)

        if plot_prior:
            sns.kdeplot(prior_vals[i,:].ravel(),
                    label = prior_label, fill = False, color = pal[i], ax = ax, cut = 0)

        ax.tick_params(bottom = True, top = True, left = False, right = False, direction = 'in', labelsize = 12)
        ax.set_ylabel('')
        ax.set_yticklabels([])
        ax.invert_xaxis()

        if (i == 0) | ((excluded_count == 1) and (excluded_idx[i])):
            ax.legend(frameon = False, loc = 'upper left')

    fig.suptitle('Section: ' + section, fontsize = 14)
    fig.supxlabel('Age (Ma)', fontsize = 12)
    fig.tight_layout()

    return fig



[docs]
def sample_ages_per_chain(full_trace, sample_df, section, chains = None, plot_prior = False, plot_excluded_samples = False, legend = True, cmap = 'viridis'):
    """
    Plot sample age posterior distributions for a given section, with separate distributions for each chain. Each subplot contains posterior distributions for a different sample. Use to check for posterior multimodality (in this example, each chain has explored a different mode of the posterior age distributions).

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import sample_ages_per_chain

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       sample_df = load_object(example_sample_path)
       section = '1'

       sample_ages_per_chain(full_trace, sample_df, section, chains = [0, 1, 2, 3])

       plt.show()


    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    section: str
        Name of target section.

    chains: list(int) or numpy.array(int); optional
        Indices of chains to include; optional (defaults to all chains).

    plot_prior: bool, optional
        Plot prior distributions for sample ages. Defaults to ``False``.

    plot_excluded_samples: bool, optional
        Plot age distributions for proxy observations that were excluded from the inference (``Exclude?`` is ``True`` in ``sample_df``). Defaults to ``False``.

    legend: bool, optional
        Generate a legend. Defaults to ``True``.

    cmap: str, optional
        Name of seaborn color palette to use for different chains. Defaults to 'viridis'.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with per-chain posterior sample age distributions.

    """
    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    sample_df, _ = clean_data(sample_df, None, proxies, [section])

    # chains x draws x samples
    vals = full_trace.posterior[str(section)+'_ages'].values
    if chains is None:
        chains = np.arange(vals.shape[0])

    prior_vals = full_trace.prior[str(section)+'_ages'].values

    if not plot_excluded_samples:
        included_idx = ~sample_df['Exclude?'].values.astype(bool)
        # shape = (samples x draws)
        vals = vals[:, :, included_idx]
        prior_vals = prior_vals[:, :, included_idx]
        excluded_idx = np.array([False] * vals.shape[2])

    else:
        excluded_idx = sample_df['Exclude?'].values

    samples = vals.shape[2]

    cols = 5
    N = samples
    rows = int(math.ceil(N/cols))

    cs = {}
    pal = sns.color_palette(cmap, n_colors=len(chains))

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (11, 1.5*rows))

    excluded_count = 0

    for i in range(samples):

        ax = fig.add_subplot(gs[i])

        if excluded_idx[i]:
            linestyle = 'dashed'
            post_label = 'Posterior (excluded sample)'
            prior_label = 'Prior (excluded sample)'
            excluded_count += 1
            lw = 1
        else:
            lw = 0
            linestyle = 'solid'
            post_label = 'Posterior'
            prior_label = 'Prior'

        for m in range(len(chains)):
            cs[m] = pal[m]

            sns.kdeplot(vals[chains[m], :, i].ravel(),
                        label = post_label + ', chain ' + str(chains[m]), fill = True, color = cs[m], alpha = 0.3, ax = ax, lw = lw, linestyle = linestyle, cut = 0)

        # assumes only 1 prior chain
        if plot_prior:
            sns.kdeplot(prior_vals[:, :, i].ravel(),
                        label = prior_label, fill = False, color = 'k', ax = ax, cut = 0)

        ax.tick_params(bottom = True, top = True, left = False, right = False, direction = 'in', labelsize = 12)
        ax.set_ylabel('')
        ax.set_yticklabels([])
        ax.invert_xaxis()

        if legend:
            if (i == 0) | ((excluded_count == 1) and (excluded_idx[i])):
                ax.legend(frameon = False, loc = 'upper left')

    fig.suptitle('Section: ' + section, fontsize = 14)
    fig.supxlabel('Age (Ma)', fontsize = 12)
    fig.tight_layout()

    return fig



[docs]
def age_constraints(full_trace, section, cmap = 'viridis', **kwargs):
    """

    For a given section, plot prior and posterior age distributions for all depositional age constraints (and limiting age constraints that provide a minimum or maximum age for the entire section). To plot intermediate limiting ages (i.e., detrital and intrusive age constraints in the middle of a section), use :py:meth:`limiting_age_constraints() <stratmc.plotting.limiting_age_constraints>`.

    .. plot::

       from stratmc.data import load_trace
       from stratmc.plotting import age_constraints

       full_trace = load_trace('examples/example_docs_trace')
       section = '1'

       age_constraints(full_trace, section)

       plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    section: str
        Name of target section.

    cmap: str, optional
        Name of seaborn color palette to use for age distributions. Defaults to 'viridis'.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with prior and posterior depositional age constraint distributions.
    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    vals = az.extract(full_trace.posterior)[str(section)+'_radiometric_age'].values
    prior_vals = az.extract(full_trace.prior)[str(section)+'_radiometric_age'].values

    # if only 1 age constraint (should only occur if one the min or max age is shared), reshape for plotting
    # (this should actually never happen -- even if shared, the age constraints end up in the section_radiometric_age RV in the model)
    if vals.ndim == 1:
            vals = np.reshape(vals, (1, len(vals)))

    if prior_vals.ndim == 1:
        prior_vals = np.reshape(prior_vals, (1, len(prior_vals)))

    samples = np.mean(vals, axis = 1).size

    cols = np.min([4, samples])
    N = samples
    rows = int(math.ceil(N/cols))

    pal = sns.color_palette(cmap, n_colors=samples)

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (11, 2*rows))

    for i in range(samples):
        ax = fig.add_subplot(gs[i])
        sns.kdeplot(vals[i,:].ravel(),
                    label = 'Posterior', fill = True, color = pal[i], edgecolor='None', alpha = 0.3, ax = ax, cut = 0)
        sns.kdeplot(prior_vals[i,:].ravel(),
                    label = 'Prior', fill = False, color = pal[i], ax = ax, cut = 0)

        ax.set_ylabel('')
        ax.set_yticklabels([])

        ax.invert_xaxis()
        ax.tick_params(bottom = True, top = True, left = False, right = False, direction = 'in', labelsize = fs)

        if i == 0:
            ax.legend(frameon = False, loc = 'upper left')

    fig.suptitle('Section: ' + section, fontsize = fs + 2)
    fig.supxlabel('Age (Ma)', fontsize = fs)

    fig.tight_layout()

    return fig



[docs]
def limiting_age_constraints(full_trace, sample_df, ages_df, section, cmap = 'viridis', **kwargs):
    """
    For a given section, plot prior and posterior age distributions for all intermediate limiting (i.e., detrital and intrusive ages in the middle of a section) age constraints. To plot depositional age constraints (and limiting age constraints that provide a minimum or maximum age for the entire section), use :py:meth:`age_constraints() <stratmc.plotting.age_constraints>`.

    .. plot::

        from stratmc.data import load_object, load_trace
        from stratmc.plotting import limiting_age_constraints

        full_trace = load_trace('examples/example_docs_limiting_ages_trace')
        example_sample_path = 'examples/example_sample_df_limiting_ages'
        example_ages_path = 'examples/example_ages_df_limiting_ages'
        sample_df = load_object(example_sample_path)
        ages_df = load_object(example_ages_path)
        section = '1'

        limiting_age_constraints(full_trace, sample_df, ages_df, section)

        plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    ages_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing age constraints for all sections.

    section: str
        Name of target section.

    cmap: str, optional
        Name of seaborn color palette to use for age distributions. Defaults to 'viridis'.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with prior and posterior limiting age constraint distributions.
    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    sample_df, ages_df = clean_data(sample_df, ages_df, proxies, [section])

    detrital_df = ages_df[(ages_df['section']==section) & (ages_df['intermediate detrital?'])]
    intrusive_df =  ages_df[(ages_df['section']==section) & (ages_df['intermediate intrusive?'])]

    detrital_named = []
    if detrital_df.shape[0] > 0:
        # grab names from list of model variables (will only include constriants that aren't shared)
        detrital_vars = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (l[0:len(section)] == section) and (f"{'detrital_age_'}" in l)
            ]

        detrital_named += [False] * len(detrital_vars)

        # if constraints are shared, grab those names too
        detrital_vars += list(detrital_df[detrital_df['shared?']]['name'].values)

        detrital_named += [True] * len(list(detrital_df[detrital_df['shared?']]['name'].values))

    else:
        detrital_vars = []

    intrusive_named = []
    if intrusive_df.shape[0] > 0:

        # grab names from list of model variables (will only include constriants that aren't shared)
        intrusive_vars = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (l[0:len(section)] == section) and (f"{'intrusive_age_'}" in l)
            ]

        intrusive_named += [False] * len(intrusive_vars)

        # if constraints are shared, grab those names too
        intrusive_vars += list(intrusive_df[intrusive_df['shared?']]['name'].values)
        intrusive_named += [True] * len(list(intrusive_df[intrusive_df['shared?']]['name'].values))

    else:
        intrusive_vars = []

    # number of age constraints to plot
    combined_vars = detrital_vars + intrusive_vars
    combined_named = detrital_named + intrusive_named

    if len(combined_vars) == 0:
            sys.exit(f"Section {section} has no intermediate limiting age constraints")

    N = len(combined_vars)

    cols = np.min([4, N])
    rows = int(math.ceil(N/cols))

    pal = sns.color_palette(cmap, n_colors=N)

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (11, 2*rows))

    for i in range(N):
        var = combined_vars[i]

        named = combined_named[i]
        if named:
            title = var
        elif i < len(detrital_vars):
            title = 'Detrital ' + str(i)
        else:
            title = 'Intrusive ' + str(i - len(detrital_vars))

        ax = fig.add_subplot(gs[i])

        ax.set_title(title, fontsize = fs)

        # distributions for detrital and intrsive ages are always 1d (different objects created for each constraint)
        vals = az.extract(full_trace.posterior)[var].values
        prior_vals = az.extract(full_trace.prior)[var].values

        sns.kdeplot(vals.ravel(),
                    label = 'Posterior', fill = True, color = pal[i], edgecolor='None', alpha = 0.3, ax = ax, cut = 0)
        sns.kdeplot(prior_vals.ravel(),
                    label = 'Prior', fill = False, color = pal[i], ax = ax, cut = 0)

        ax.set_ylabel('')
        ax.set_yticklabels([])

        ax.invert_xaxis()
        ax.tick_params(bottom = True, top = True, left = False, right = False, direction = 'in', labelsize = fs)

        if i == 0:
            ax.legend(frameon = False, loc = 'upper left')

    fig.suptitle('Section: ' + section, fontsize = fs + 2)
    fig.supxlabel('Age (Ma)', fontsize = fs)

    fig.tight_layout()

    return fig



[docs]
def sadler_plot(full_trace, sample_df, ages_df, method = 'density', duration_bins = 50, rate_bins = 50, scale = 'log', include_age_constraints = True, density_cmap = 'jet', section_cmap = 'Spectral', figsize = (6, 4), **kwargs):
    """
    Plot accumulation rate against duration.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import sadler_plot

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       example_ages_path = 'examples/example_ages_df'
       sample_df = load_object(example_sample_path)
       ages_df = load_object(example_ages_path)

       sadler_plot(full_trace, sample_df, ages_df)

       plt.show()


    Parameters
    ----------

    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    ages_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing age constraints for all sections.

    method: str, optional
        Plot as a 2D histogram ('density') or as a scatter plot ('scatter'). The density plot will combine data for all sections in ``sections``, while a scatter plot will assign a unique color to each section. Defaults to 'density'.

    duration_bins: int, optional
        Number of bin edges to use for the duration data. Defaults to 50.

    rate_bins: int, optional
        Number of bin edges to use for the rate data. Defaults to 50.

    scale: str, optional
        Scaling for x- and y-axes ('linear' or 'log'). Defaults to 'log'.

    sections: list(str) numpy.array(str), optional
        List of target sections. Defaults to all sections in ``sample_df``.

    include_age_constraints: bool, optional
        Include age constraints in sedimentation rate calculations. Defaults to ``False``.

    density_cmap: str, optional
        Name of matplotlib colormap to use for probability density if ``method`` is 'density'. Defaults to 'jet'.

    section_cmap: str, optional
        Name of seaborn color palette to use for sections if ``method`` is 'scatter'. Defaults to 'Spectral'.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with sediment accumulation rate plotted against duration.

    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'sections' in kwargs:
        sections = list(kwargs['sections'])
    else:
        # any sections that have no relevant proxy observations would not have ages tracked in the trace
        sections = np.unique(sample_df.dropna(subset = proxies, how = 'all')['section'])

    sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections)

    rate_df = accumulation_rate(full_trace, sample_df, ages_df, method = 'all', sections = sections, include_age_constraints = include_age_constraints)

    dur = rate_df['duration'].values
    rate = rate_df['rate'].values

    fig = plt.figure(figsize = figsize)
    ax = fig.gca()

    if method == 'density':
        if scale == 'log':
            x_bins = np.logspace(np.log10(dur.min()), np.log10(dur.max()), duration_bins)
            y_bins = np.logspace(np.log10(rate.min()), np.log10(rate.max()), rate_bins)

        else:
            x_bins = np.linspace(dur.min(), dur.max(), duration_bins)
            y_bins = np.linspace(rate.min(),rate.max(), rate_bins)

        H, xedges, yedges = np.histogram2d(dur, rate, bins=[x_bins, y_bins], density = False) # don't normalize during this step - calculation not correct if scale = log

        H[H == 0] = np.nan
        H = H/len(dur) # normalize

        m = ax.pcolormesh(xedges, yedges, H.T, cmap = density_cmap)

        cb = plt.colorbar(m)
        cb.set_label(label='Probability density', fontsize = fs)

    elif method == 'scatter':
        pal = sns.color_palette(section_cmap, n_colors = len(sections))
        for i in np.arange(len(sections)):
            section = sections[i]
            ax.scatter(rate_df[rate_df['section']==section]['duration'].values, rate_df[rate_df['section']==section]['rate'].values,
                       s = 5,
                       edgecolor = pal[i],
                       facecolor = 'none',
                      alpha = 0.5,
                      label = section)
        ax.legend()

    if scale == 'log':
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_xlabel('LOG$_{10}$ (Duration [yr])', fontsize = fs)
        ax.set_ylabel('LOG$_{10}$ (Accumulation rate [mm/yr])', fontsize = fs)

    else:
        ax.set_xlabel('Duration (yr)', fontsize = fs)
        ax.set_ylabel('Accumulation rate (mm/yr)', fontsize = fs)

    ax.tick_params(labelsize = fs)

    fig.tight_layout()

    return fig




[docs]
def accumulation_rate_stratigraphy(full_trace, sample_df, ages_df, age_bins = 50, age_bin_edges = [], rate_bins = 50, rate_bin_edges = [], rate_scale = 'log', include_age_constraints = True, cmap = 'jet', figsize = (8, 4), **kwargs):
    """
    Plot the probability density of sediment accumulation rates (calculated between successive samples) through time. Probability density for each age bin sums to 1. Unless a ``sections`` argument is passed, includes accumulation rates for all sections.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import accumulation_rate_stratigraphy

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       example_ages_path = 'examples/example_ages_df'
       sample_df = load_object(example_sample_path)
       ages_df = load_object(example_ages_path)

       accumulation_rate_stratigraphy(full_trace, sample_df, ages_df, sections = ['1'])

       plt.show()

    Parameters
    ----------

    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    ages_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing age constraints for all sections.

    age_bins: int, optional
        Number of bin edges for age data (if ``age_bin_edges`` not provided). Defaults to 50.

    rate_bins: int, optional
        Number of bin edges for rate data (if ``rate_bin_edges`` not provided). Defaults to 50.

    age_bin_edges: list(float) or numpy.array(float), optional
        List or array of bin edges for the age data.

    rate_bin_edges: list(float) numpy.array(float), optional
        List or array of bin edges for the rate data.

    rate_scale: str, optional
        Scaling for rate ('linear' or 'log'). Defaults to 'log'.

    include_age_constraints: bool, optional
        Include age constraints in sedimentation rate calculations. Defaults to ``True``.

    sections: list(str) or numpy.array(str), optional
        Section(s) to include. If not provided, combines data from all sections in ``sample_df``.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with accumulation rate probability density through time.

    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'sections' in kwargs:
        sections = list(kwargs['sections'])
    else:
        sections = np.unique(sample_df.dropna(subset = proxies, how = 'all')['section'])

    sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections)

    rate_df = accumulation_rate(full_trace, sample_df, ages_df, sections = sections, method = 'successive', include_age_constraints = include_age_constraints)

    if len(age_bin_edges) == 0:
        age_bin_edges = np.linspace(np.min(rate_df['top_age']), np.max(rate_df['base_age']), age_bins)

    if len(rate_bin_edges) == 0:
        if rate_scale == 'log':
            rate_bin_edges = np.logspace(np.log10(rate_df['rate'].min()), np.log10(rate_df['rate'].max()), rate_bins)
        elif rate_scale == 'linear':
            rate_bin_edges = np.linspace(rate_df['rate'].min(), rate_df['rate'].max(), rate_bins)

    # get age bin centers
    age_centers = (age_bin_edges[:-1] + age_bin_edges[1:]) / 2

    age_vec = []
    rate_vec = []

    for rate, base, top in zip(rate_df['rate'], rate_df['base_age'], rate_df['top_age']):
        for i in np.arange(len(age_bin_edges)-1):
            center = age_centers[i]
            current_bin = pd.Interval(age_bin_edges[i], age_bin_edges[i+1], closed = 'left')
            current_sample_pair = pd.Interval(top, base, closed = 'right')
            result = current_bin.overlaps(current_sample_pair)
            if result:
                age_vec.append(center)
                rate_vec.append(rate)


    fig = plt.figure(figsize = figsize)
    ax = fig.gca()

    H, xedges, yedges = np.histogram2d(age_vec, rate_vec, bins=[age_bin_edges, rate_bin_edges], density = False)

    H[H == 0] = np.nan
    H = H/np.nansum(H, axis=1,keepdims=1) # each column (age bin) should sum to 1

    m = ax.pcolormesh(xedges, yedges, H.T, cmap = cmap)

    if rate_scale == 'log':
        ax.set_yscale('log')
        ax.set_ylabel('LOG$_{10}$ (Accumulation rate [m/Myr])', fontsize = fs)

    elif rate_scale == 'linear':
        ax.set_ylabel('Accumulation rate (m/Myr)', fontsize = fs)

    ax.invert_xaxis()
    ax.set_xlabel('Age (Ma)', fontsize = fs)

    cb = plt.colorbar(m)
    cb.set_label(label='Probability density', fontsize = fs)

    return fig




[docs]
def section_age_range(full_trace, sample_df, ages_df, lower_age, upper_age, legend = True, section_cmap = 'Spectral', **kwargs):
    """
    Plot the stratigraphic interval corresponding to a given age range (based on the posterior section age models). If ``sections`` is not provided, includes each section that overlaps the target age range.

    .. plot::

       from stratmc.data import load_object, load_trace
       from stratmc.plotting import section_age_range

       full_trace = load_trace('examples/example_docs_trace')
       example_sample_path = 'examples/example_sample_df'
       example_ages_path = 'examples/example_ages_df'
       sample_df = load_object(example_sample_path)
       ages_df = load_object(example_ages_path)

       section_age_range(full_trace, sample_df, ages_df, 120, 130)

       plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    sample_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing proxy data for all sections.

    ages_df: pandas.DataFrame
        :class:`pandas.DataFrame` containing age constraints for all sections.

    lower_age: float
        Lower bound (youngest) of the target age interval.

    upper_age: float
        Upper bound (oldest) of the target age interval.

    legend: bool, optional
        Generate a legend. Defaults to ``True``.

    cmap: str, optional
        Name of seaborn color palette to use for sections. Defaults to 'Spectral'.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Tracer stratigraphy for each section, with the stratigraphic interval corresponding to the input age range highlighted.

    """

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']
    else:
        proxy = proxies[0]

    if 'sections' in kwargs:
        sections = list(kwargs['sections'])
    else:
        sections = np.unique(sample_df.dropna(subset = proxies, how = 'all')['section'])

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']

    else:
        fs = 12

    sample_df, ages_df = clean_data(sample_df, ages_df, proxies, sections)

    stats_df = age_range_to_height(full_trace, sample_df, ages_df,lower_age, upper_age, **kwargs)

    sections = stats_df.index.tolist()

    cs = {}
    pal = sns.color_palette(section_cmap, n_colors=len(sections))

    cols = 4
    N = len(sections)
    rows = int(math.ceil(N/cols))

    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize = (10, 4.5*rows))

    for n in range(N):
        section = sections[n]
        cs[section] = pal[n]
        ax = fig.add_subplot(gs[n])

        section_df = sample_df[sample_df['section']==section]

        ax.scatter(section_df[proxy], section_df['height'], color = cs[section], edgecolor = 'k', zorder = 3)

        # 0 is the top height, -1 is the bottom height
        ax.axhspan(stats_df.loc[section]['base_2.5'], stats_df.loc[section]['top_97.5'], zorder = 0, color = (0.95, 0.95, 0.95), label='95% envelope')
        ax.axhspan(stats_df.loc[section]['base_16'], stats_df.loc[section]['top_84'], zorder = 1, color = (0.8, 0.8, 0.8), label='68% envelope')
        ax.axhline(stats_df.loc[section]['base_mle'], color = 'darkgray', linestyle = 'solid', zorder = 2)
        ax.axhline(stats_df.loc[section]['top_mle'], color = 'darkgray', linestyle = 'solid', zorder = 2, label = 'Most likely range')

        ax.set_ylabel('Height (m)', fontsize = fs)
        if proxy == 'd13c':
            ax.set_xlabel(r'$\delta^{13}$C$_{\mathrm{carb}} (‰)$', fontsize = fs)
        else:
            ax.set_xlabel(proxy, fontsize = fs)

        ax.set_title(section, fontsize = fs)
        ax.tick_params(labelsize=fs)
        ax.locator_params(axis='x', nbins=5)

        if legend:
            if n == 0:
                ax.legend(loc = 'upper left')

    fig.tight_layout()

    plt.subplots_adjust(wspace=0.4)

    return fig



[docs]
def proxy_data_gaps(full_trace, time_grid = None, yaxis = 'percentage', figsize = (6, 3.5), **kwargs):

    """
    For a set of discrete time bins, shows the number of draws from the posterior where there are no proxy observations (i.e., where there are temporal `gaps` in the proxy data). This plot can be used to determine where additional observations are needed to improve the inference.

    .. plot::

        from stratmc.data import load_trace
        from stratmc.plotting import proxy_data_gaps

        full_trace = load_trace('examples/example_docs_trace')

        proxy_data_gaps(full_trace, time_grid = full_trace.X_new.X_new.values[::2])

        plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    time_grid: np.array, optional
        Time bin edges; if not provided, defaults to the ``ages`` array passed to :py:meth:`get_trace() <stratmc.inference.get_trace>`.

    yaxis: str, optional
        Set y-axis to percentage of posterior draws without observations ('percentage') or to the number of posterior draws without observations ('count'). Defaults to 'percentage'.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with bar plot of number of posterior draws with gaps for each time bin.

    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    if time_grid is None:
        time_grid = full_trace.X_new.X_new.values

    age_gaps, grid_centers, grid_widths, n =  find_gaps(full_trace, time_grid = time_grid)

    fig = plt.figure(figsize = figsize)

    ax = fig.gca()

    if yaxis == 'percentage':
        ax.bar(grid_centers, height = 100 * age_gaps/n, width = grid_widths, color = 'lightgray', edgecolor = 'k', lw = 0.5)
        ax.set_ylabel('% solutions without observations', fontsize = fs)

    elif yaxis == 'count':
        ax.bar(grid_centers, height = age_gaps, width = grid_widths, color = 'lightgray', edgecolor = 'k', lw = 0.5)
        ax.set_ylabel('# solutions without observations', fontsize = fs)

    ax.set_xlabel('Age (Ma)', fontsize = fs)
    ax.tick_params(labelsize = fs)

    ax.invert_xaxis()

    fig.tight_layout()

    return fig



[docs]
def proxy_data_density(full_trace, time_grid = None, figsize = (6, 3.5), **kwargs):
    """
    Plot the mean number proxy observations in discrete time bins (averaged over all posterior draws). This plot can be used to determine where proxy observations are relatively sparse, and additional observations may improve the inference.

    .. plot::

        from stratmc.data import load_trace
        from stratmc.plotting import proxy_data_density

        full_trace = load_trace('examples/example_docs_trace')

        proxy_data_density(full_trace, time_grid = full_trace.X_new.X_new.values[::2])

        plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    time_grid: np.array, optional
        Time bin edges; if not provided, defaults to the ``ages`` array passed to :py:meth:`get_trace() <stratmc.inference.get_trace>`.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with bar plot of mean number of observations in each time bin.

    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    if time_grid is None:
        time_grid = full_trace.X_new.X_new.values

    sample_counts, grid_edges, n =  count_samples(full_trace, time_grid = time_grid)

    grid_widths = np.diff(grid_edges)

    fig = plt.figure(figsize = figsize)

    ax = fig.gca()

    ax.bar(grid_edges[:-1], height = sample_counts/n, width = grid_widths, align = 'edge', color = 'lightgray', edgecolor = 'k', lw = 0.5)

    ax.set_ylabel('Average # observations', fontsize = fs)
    ax.set_xlabel('Age (Ma)', fontsize = fs)
    ax.tick_params(labelsize = fs)

    ax.invert_xaxis()

    fig.tight_layout()

    return fig



[docs]
def lengthscale_traceplot(full_trace, chains = None, legend = True, figsize = (5, 3.5), **kwargs):
    """
    Generate trace plot (parameter value vs. step in Markov chain) for the :class:`pymc.gp.cov.ExpQuad <pymc.gp.cov.ExpQuad>` covariance kernel lengthscale. By default, includes all chains; to plot the draws for only a subset of chains, past list of chain indices to ``chains``. Use to check for posterior multimodality.

    .. plot::

        from stratmc.data import load_trace
        from stratmc.plotting import lengthscale_traceplot

        full_trace = load_trace('examples/example_docs_trace')

        lengthscale_traceplot(full_trace, chains = [0, 1, 2, 3])

        plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    chains: list(int) or numpy.array(int); optional
        Indices of chains to plot; optional (defaults to all chains).

    proxy: str, optional
        Target proxy; only required if more than one proxy was included in the inference.

    legend: bool, optional
        Generate a legend. Defaults to ``True``.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure with lengthscale trace plot.
    """


    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']

    else:
        proxy = proxies[0]

    fig = plt.figure(figsize = figsize)
    ax = fig.gca()

    if chains is None:
        post_ls = full_trace.posterior['gp_ls_' +str(proxy)].values
        chains = np.arange(post_ls.shape[0])

    else:
        post_ls = full_trace.sel(chain = chains).posterior['gp_ls_' +str(proxy)].values

    sns.reset_defaults()

    for i in np.arange(post_ls.shape[0]):
        ax.plot(np.arange(post_ls.shape[1]),
                post_ls[i, :, :],
                lw = 0.5,
                label = 'Chain ' + str(chains[i]))

    ax.set_xlabel('Draw', fontsize = fs)
    ax.set_ylabel('Lengthscale (%s)' % proxy, fontsize = fs)

    if legend:
        ax.legend()

    ax.tick_params(labelsize = fs)

    fig.tight_layout()

    return fig



[docs]
def lengthscale_stability(full_trace, figsize = (5, 3.5), **kwargs):

    """
    Plot the posterior standard deviation of the :class:`pymc.gp.cov.ExpQuad <pymc.gp.cov.ExpQuad>` covariance kernel lengthscale when 1 through *N* chains are considered. When the posterior has been sufficiently explored, the standard deviation will stabilize; if it has not stabilized, then additional chains should be run.

    To consider chains from multiple traces associated with the same inference model, first combine the traces (saved as NetCDF files) using :py:meth:`combine_traces() <stratmc.data>` in :py:mod:`stratmc.data`.

    .. plot::

        from stratmc.data import load_trace
        from stratmc.plotting import lengthscale_stability

        full_trace = load_trace('examples/example_docs_trace')

        lengthscale_stability(full_trace, fontsize = 10)

        plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    proxy: str, optional
        Target proxy; only required if more than one proxy was included in the inference.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure showing the standard deviation of the covariance kernel lengthscale hyperparameter posterior for 1 through *N* chains.
    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']

    else:
        proxy = proxies[0]

    gp_ls_variance = calculate_lengthscale_stability(full_trace, proxy = proxy)

    fig = plt.figure(figsize = figsize)
    ax = fig.gca()

    ax.plot(np.arange(len(gp_ls_variance)) + 1, gp_ls_variance, color = 'k')

    ax.set_xlabel('Number of chains', fontsize = fs)
    ax.set_ylabel('Lengthscale standard deviation (%s)' % proxy, fontsize = fs)

    ax.tick_params(labelsize = fs)

    fig.tight_layout()

    return fig



[docs]
def proxy_signal_stability(full_trace, figsize = (5, 3.5), **kwargs):
    """
    Plot the sum (over all time slices) of the residuals between the median inferred proxy signal when all *N* chains are considered compared to when 1 to *N-1* chains are considered. When the posterior has been sufficiently explored, the residuals will stabilize and approach zero; if they have not stabilized, then additional chains should be run.

    To consider chains from multiple traces associated with the same inference model, first combine the traces (saved as NetCDF files) using :py:meth:`combine_traces() <stratmc.data>` in :py:mod:`stratmc.data`.

    .. plot::

        from stratmc.data import load_trace
        from stratmc.plotting import proxy_signal_stability

        full_trace = load_trace('examples/example_docs_trace')

        proxy_signal_stability(full_trace)

        plt.show()

    Parameters
    ----------
    full_trace: arviz.InferenceData
        An :class:`arviz.InferenceData` object containing the full set of prior and posterior samples from :py:meth:`get_trace() <stratmc.inference.get_trace>` in :py:mod:`stratmc.inference`.

    proxy: str, optional
        Target proxy; only required if more than one proxy was included in the inference.


    Returns
    -------
    fig: matplotlib.pyplot.figure
        Figure showing the stability of the proxy signal inference.
    """

    if 'fontsize' in kwargs:
        fs = kwargs['fontsize']
    else:
        fs = 12

    # get list of proxies included in model from full_trace
    variables = [
            l
            for l in list(full_trace["posterior"].data_vars.keys())
            if (f"{'gp_ls_'}" in l) and (f"{'unshifted'}" not in l)
            ]

    proxies = []
    for var in variables:
        proxies.append(var[6:])

    if 'proxy' in kwargs:
        proxy = kwargs['proxy']

    else:
        proxy = proxies[0]

    median_residuals = calculate_proxy_signal_stability(full_trace, proxy = proxy)

    fig = plt.figure(figsize = figsize)
    ax = fig.gca()

    ax.plot(np.arange(median_residuals.shape[0]) + 1, np.sum(median_residuals, axis = 1), color = 'k')

    ax.set_xlabel('Number of chains', fontsize = fs)
    ax.set_ylabel('Sum of median ' + str(proxy) + ' residuals', fontsize = fs)

    ax.tick_params(labelsize = fs)

    fig.tight_layout()

    return fig