Source code for virocon.plotting

"""
Functions to plot distributions and contours.
"""

import re
from functools import partial

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sts

from matplotlib.colors import LinearSegmentedColormap

from virocon.utils import calculate_design_conditions

__all__ = [
    "plot_marginal_quantiles",
    "plot_dependence_functions",
    "plot_histograms_of_interval_distributions",
    "plot_2D_isodensity",
    "plot_2D_contour",
]


# colors (schemes) chosen according to https://personal.sron.nl/~pault/


def _rainbow_PuRd():
    """
    Thanks to Paul Tol (https://personal.sron.nl/~pault/data/tol_colors.py)
    License:  Standard 3-clause BSD
    """
    clrs = [
        "#6F4C9B",
        "#6059A9",
        "#5568B8",
        "#4E79C5",
        "#4D8AC6",
        "#4E96BC",
        "#549EB3",
        "#59A5A9",
        "#60AB9E",
        "#69B190",
        "#77B77D",
        "#8CBC68",
        "#A6BE54",
        "#BEBC48",
        "#D1B541",
        "#DDAA3C",
        "#E49C39",
        "#E78C35",
        "#E67932",
        "#E4632D",
        "#DF4828",
        "#DA2222",
    ]
    cmap = LinearSegmentedColormap.from_list("rainbow_PuRd", clrs)
    cmap.set_bad("#FFFFFF")
    return cmap


# TODO move to utility as it is also used in contours.py
def get_default_semantics(n_dim):
    """
    Generate default semantics for n_dim dimensions.

    Parameters
    ----------
    n_dim : int
        Number of dimensions. Indicating the number of variables of the model.

    Returns
    -------
    semantics: dict
        Generated model description.

    """

    semantics = {
        "names": [f"Variable {dim + 1}" for dim in range(n_dim)],
        "symbols": [f"X_{dim + 1}" for dim in range(n_dim)],
        "units": ["arb. unit" for dim in range(n_dim)],
    }
    return semantics


def _get_n_axes(n_intervals, max_cols=4):
    if n_intervals <= 0:
        raise ValueError(
            f"n_intervals should be a positive integer, but got {n_intervals}."
        )
    elif n_intervals < max_cols**2:
        table = np.array(
            [
                (rows, cols)
                for cols in range(1, max_cols + 1)
                for rows in range(1, cols + 1)
            ]
        )
        table = table[np.prod(table, axis=1) >= n_intervals]
        (nrows, ncols) = table[np.argmin(np.prod(table, axis=1) - n_intervals)]
    else:
        nrows = np.ceil(n_intervals / max_cols).astype(int)
        ncols = max_cols

    fig, axes = plt.subplots(
        nrows=nrows, ncols=ncols, sharex=True, sharey=True, squeeze=False
    )
    return fig, axes.ravel()



[docs]
def plot_marginal_quantiles(model, sample, semantics=None, axes=None):
    """
    Plot all marginal quantiles of a model.

    Plots the fitted marginal distribution versus a dataset in a quantile-quantile (QQ) plot.

    Parameters
    ----------
    model :  MultivariateModel
        The model used to plot the marginal quantiles.
    sample : ndarray of floats
        The environmental data sample that should be plotted against the fit.
        Shape: (sample_size, n_dim)
    semantics: dict, optional
        The description of the model. If None (the default) generic semantics
        will be used. The structure is as follows:
        modeldesc = {
        "names" : [<Names of variables>],
        "symbols" : [<Description of symbols>],
        "units" : [<Units of variables>] }
    axes: list, optional
        The matplotlib axes objects to plot into. One for each dimension. If
        None (the default) a new figure will be created for each dimension.

    Returns
    -------
    The used matplotlib axes object.

    Notes
    -----
    When saving the resulting axes in a vector image format (pdf, svg) the
    `sample` will still be rasterized to reduce the file size.
    To prevent that, iterate over the axes and use
    ``ax.get_lines()[0].set_rasterized(False)``.

    """

    sample = np.asarray(sample)
    n_dim = model.n_dim
    if semantics is None:
        semantics = get_default_semantics(n_dim)

    if axes is None:
        axes = []
        for i in range(n_dim):
            _, ax = plt.subplots()
            axes.append(ax)

    # probplot expects an object that has a ppf method, but we name it icdf
    # therefor we create a wrapper that maps the ppf to the icdf method
    class MarginalDistWrapper:
        def __init__(self, model, idx):
            self.model = model
            self.idx = idx

        def ppf(self, q):
            return self.model.marginal_icdf(q, self.idx)

    for dim in range(n_dim):
        dist_wrapper = MarginalDistWrapper(model, dim)
        ax = axes[dim]

        sts.probplot(sample[:, dim], dist=dist_wrapper, fit=False, plot=ax)
        ax.get_lines()[0].set_markerfacecolor("k")
        ax.get_lines()[0].set_markeredgecolor("k")
        ax.get_lines()[0].set_marker("x")
        ax.get_lines()[0].set_markersize(3)

        # Because a sample usually holds much more than 1000 observations, the
        # output shall be rasterized to reduce file size if the figure is saved
        # in a vector file format (svg, pdf).
        ax.get_lines()[0].set_rasterized(True)

        # prior to scipy version 1.7.0 a regression fit line was plotted,
        # even with option fit=False. As discussed in PR#149 we do not want
        # to keep this line. So here we remove it if it was plotted.
        if len(ax.lines) > 1:
            ax.lines[1].remove()

        # draw 45° line
        # adapted from statsmodels.graphics.gofplots.qqline
        # https://github.com/statsmodels/statsmodels/blob/161ca84b2b6e2b3ba5ef6b570d8c907c9b06f5de/statsmodels/graphics/gofplots.py#L892-L897
        end_pts = list(zip(ax.get_xlim(), ax.get_ylim()))
        end_pts[0] = min(end_pts[0])
        end_pts[1] = max(end_pts[1])
        ax.plot(end_pts, end_pts, c="#BB5566")
        ax.set_xlim(end_pts)
        ax.set_ylim(end_pts)

        name_and_unit = f"{semantics['names'][dim].lower()} ({semantics['units'][dim]})"
        ax.set_xlabel(f"Theoretical quantiles of {name_and_unit}")
        ax.set_ylabel(f"Ordered values of {name_and_unit}")
        ax.title.set_text("")

    return axes




[docs]
def plot_dependence_functions(model, semantics=None, par_rename={}, axes=None):
    """
    Plot the fitted dependence functions of a model.

    Parameters
    ----------
    model :  MultivariateModel
        The model with the fitted dependence functions.
    semantics: dict, optional
        The description of the model. If None (the default) generic semantics will be used.
    par_rename : dict, optional
        A dictionary that maps from names of conditional parameters to a
        string. If e.g. the model has a distribution with a conditional
        parameter named 'mu' one could change that in the plot to '$mu$' with
        {'mu': '$mu$'}.
    axes: list of matplotlib axes objects, optional
        If not further specified, the number of axes are dependent on the number
        of dimensions of the model. Defaults to None.

    Returns
    -------
    The used matplotlib axes object.

    """

    n_dim = model.n_dim
    conditional_dist_idc = [
        dim for dim in range(n_dim) if model.conditional_on[dim] is not None
    ]

    if semantics is None:
        semantics = get_default_semantics(n_dim)

    if axes is None:
        n_axes = 0
        for dim in conditional_dist_idc:
            n_axes += len(model.distributions[dim].conditional_parameters)
        axes = []
        for i in range(n_axes):
            _, ax = plt.subplots()
            axes.append(ax)

    axes_counter = 0
    for dim in conditional_dist_idc:
        dist = model.distributions[dim]
        conditioning_values = dist.conditioning_values
        if conditioning_values is not None:
            x = np.linspace(0, max(conditioning_values))
        else:
            x = np.linspace(0, 10)
        cond_idx = model.conditional_on[dim]
        x_name = semantics["names"][cond_idx]
        x_symbol = semantics["symbols"][cond_idx]
        x_unit = semantics["units"][cond_idx]
        x_label = f"{x_name}," + r" $\it{" + f"{x_symbol}" + r"}$" + f" ({x_unit})"
        for par_name, dep_func in dist.conditional_parameters.items():
            ax = axes[axes_counter]
            axes_counter += 1
            # If a model is created directly (without fitting it to a dataset), it does
            # not have conditioning values.
            if conditioning_values is not None:
                par_values = [par[par_name] for par in dist.parameters_per_interval]
                ax.scatter(
                    conditioning_values,
                    par_values,
                    c="k",
                    marker="x",
                    label="estimates from intervals",
                )
            if dep_func.latex is not None:
                dep_func_label = dep_func.latex

                # Replace 'x' with variable symbol (e.g. 'h_s')
                splitted_symbol = x_symbol.split("_")
                if len(splitted_symbol) == 1:  # If there was no underscore.
                    var_symbol = splitted_symbol[0].lower()
                else:  # If there was one or many underscores.
                    var_symbol = (
                        splitted_symbol[0].lower() + "_" + "_".join(splitted_symbol[1:])
                    )

                # Replace 'x' if it is not part of '\exp' which is checked by checking whether
                # 'x' follows '\e'.
                dep_func_label = re.sub(
                    r"(?<!\\e)x", "{" + var_symbol + "}", dep_func_label
                )

                # Replace symbols of parameters (a, b, ..) with estimated values.
                for par_name_local, par_value_local in dep_func.parameters.items():
                    dep_func_label = dep_func_label.replace(
                        par_name_local, "{" + "{:.2f}".format(par_value_local) + "}"
                    )
            else:
                if not isinstance(dep_func.func, partial):
                    dep_func_label = "Dependence function: " + dep_func.func.__name__
                else:
                    dep_func_label = (
                        "Dependence function: " + dep_func.func.func.__name__
                    )
            ax.plot(x, dep_func(x), c="#004488", label=dep_func_label)
            ax.set_xlabel(x_label)
            if par_name in par_rename:
                y_label = par_rename[par_name]
            else:
                y_label = par_name
            ax.set_ylabel(y_label)
            ax.legend()

    return axes




[docs]
def plot_histograms_of_interval_distributions(
    model,
    sample,
    semantics=None,
    plot_pdf=True,
    max_cols=4,
):
    """
    Plot histograms of all model dimensions.

    If the model is conditional in a dimension all histograms per interval are plotted for that dimension.
    In such a case the fitted interval distributions are generally different from the final joint distribution.

    Parameters
    ----------
    model : MultivariateModel
        The model that was fitted to the dataset.
    sample : ndarray
        The data that was used to fit the model.
    semantics: dict, optional
        The description of the model. If None (the default) generic semantics will be used.
    plot_pdf: boolean, optional
        Whether the fitted probability density should be plotted. Defaults to True.
    max_cols : int, default 4
        Maximum number of columns (subfigures horizontally).

    Returns
    -------
    The used matplotlib axes objects.

    """
    sample = np.asarray(sample)
    n_dim = model.n_dim

    if semantics is None:
        semantics = get_default_semantics(n_dim)

    figures = []
    axes_list = []

    for dim in range(n_dim):
        x_name = semantics["names"][dim]
        x_symbol = semantics["symbols"][dim]
        x_unit = semantics["units"][dim]
        x_label = f"{x_name}," + r" $\it{" + f"{x_symbol}" + r"}$" + f" ({x_unit})"

        if model.conditional_on[dim] is None:
            # unconditional
            data = sample[:, dim]
            dist = model.distributions[dim]
            x = np.linspace(np.min(data), np.max(data))
            fig, ax = plt.subplots()
            ax.hist(
                data,
                bins="doane",
                density=True,
                color="#000000",
                histtype="stepfilled",
                alpha=0.2,
            )
            if plot_pdf:
                density = dist.pdf(x)
                ax.plot(
                    x,
                    density,
                    color="#004488",
                )
            ax.set_xlabel(x_label)
            ax.set_ylabel("Probability density")
            ax.set_title(f"n={len(data)}")

            figures.append(fig)
            axes_list.append(ax)

        else:
            # conditional
            cond_dist = model.distributions[dim]
            dist_per_interval = cond_dist.distributions_per_interval
            n_intervals = len(dist_per_interval)

            conditioning_idx = model.conditional_on[dim]
            conditioning_symbol = semantics["symbols"][conditioning_idx]
            conditioning_unit = semantics["units"][conditioning_idx]

            slicer = model.interval_slicers[conditioning_idx]
            interval_slices, conditioning_values, _ = slicer.slice_(
                sample[:, conditioning_idx]
            )
            data_intervals = [sample[int_slice, dim] for int_slice in interval_slices]

            # if the following fails, the sample was probably not used for fitting
            # TODO raise proper exception then
            np.testing.assert_allclose(
                conditioning_values, cond_dist.conditioning_values
            )
            assert len(data_intervals) == len(cond_dist.data_intervals)
            for i in range(len(data_intervals)):
                np.testing.assert_allclose(
                    np.sort(data_intervals[i]), np.sort(cond_dist.data_intervals[i])
                )

            fig, axes = _get_n_axes(n_intervals, max_cols=max_cols)
            for interval_idx in range(n_intervals):
                cond_val = conditioning_values[interval_idx]
                data = data_intervals[interval_idx]
                x = np.linspace(np.min(data), np.max(data))
                dist = dist_per_interval[interval_idx]
                ax = axes[interval_idx]
                ax.hist(
                    data,
                    bins="doane",
                    density=True,
                    color="#000000",
                    histtype="stepfilled",
                    alpha=0.2,
                )
                if plot_pdf:
                    density = dist.pdf(x)
                    ax.plot(
                        x,
                        density,
                        color="#004488",
                    )
                ax.set_xlabel(x_label)
                ax.set_ylabel("Probability density")
                title = f"{conditioning_symbol} = {cond_val:.3f} {conditioning_unit}, n={len(data)}"
                ax.set_title(title)

            figures.append(fig)
            axes_list.append(axes)

    return figures, axes_list




[docs]
def plot_2D_isodensity(
    model,
    sample,
    semantics=None,
    swap_axis=False,
    limits=None,
    levels=None,
    ax=None,
    n_grid_steps=250,
    cmap=None,
    contour_kwargs=None,
):
    """
    Plot isodensity contours and a data sample for a 2D model.

    Parameters
    ----------
    model :  MultivariateModel
        The 2D model to use.
    sample : ndarray of floats
        The 2D data sample that should be plotted.
    semantics: dict, optional
        The description of the model. If None (the default) generic semantics
        will be used.
    swap_axis : boolean, optional
        If True the second dimension of the model is plotted on the x-axis and
        the first on the y-axis. Otherwise, vice-versa. Defaults to False.
    limits : list of tuples, optional
        Specifies in which rectangular region the density is calculated and
        plotted. If None (default) limits will be set automatically.
        Example: [(0, 20), (0, 12)]
    levels : list of floats, optional
        The probability density levels that are plotted. If None (default)
        levels are set automatically.
        Example: [0.001, 0.01, 0.1]
    n_grid_steps : int, optional
        The number of steps along each axis of the grid used to plot the contours.
        Defaults to 250.
    ax : matplotlib Axes, optional
        Matplotlib axes object to use for plotting. If None (default) a new
        figure will be created.
    cmap : matplotlib colormap, optional
        The colormap to use for the isodensity contours.
    contour_kwargs : dict
        Any other keyword arguments for matplotlib.pyplot.contour().
        Example: {"linewidths": [1,2,3], "linestyles": ["solid","dotted","dashed"]}

    Returns
    -------
    The used matplotlib axes object.

    """

    n_dim = model.n_dim
    assert n_dim == 2

    if swap_axis:
        x_idx = 1
        y_idx = 0
    else:
        x_idx = 0
        y_idx = 1

    if semantics is None:
        semantics = get_default_semantics(n_dim)

    if ax is None:
        _, ax = plt.subplots()

    sample = np.asarray(sample)
    ax.scatter(
        sample[:, x_idx],
        sample[:, y_idx],
        c="k",
        marker=".",
        alpha=0.3,
        rasterized=True,
    )

    if limits is not None:
        x_lower = limits[0][0]
        x_upper = limits[0][1]
        y_lower = limits[1][0]
        y_upper = limits[1][1]
    else:
        x_range = max(sample[:, 0]) - min((sample[:, 0]))
        expand_factor = 0.05
        x_lower = min(sample[:, 0]) - expand_factor * x_range
        x_upper = max(sample[:, 0]) + expand_factor * x_range
        y_range = max(sample[:, 1]) - min((sample[:, 1]))
        y_lower = min(sample[:, 1]) - expand_factor * y_range
        y_upper = max(sample[:, 1]) + expand_factor * y_range

    x, y = np.linspace((x_lower, y_lower), (x_upper, y_upper), num=n_grid_steps).T
    X, Y = np.meshgrid(x, y)
    grid_flat = np.c_[X.ravel(), Y.ravel()]
    f = model.pdf(grid_flat)
    Z = f.reshape(X.shape)

    if swap_axis:
        tmp = X
        X = Y
        Y = tmp

    if levels is not None:
        lvl_labels = ["{:.1E}".format(x) for x in levels]
        n_levels = len(levels)
    else:
        # Define the lowest isodensity level based on the density values on the evaluated grid.
        q = np.quantile(f, q=0.5)
        if q > 0:
            min_lvl = int(f"{q:.0e}".split("e")[1])
        else:
            min_lvl = -5
        n_levels = np.abs(min_lvl)
        levels = np.logspace(-1, min_lvl, num=n_levels)[::-1]
        lvl_labels = [f"1E{int(i)}" for i in np.linspace(-1, min_lvl, num=n_levels)][
            ::-1
        ]

    if cmap is None:
        cmap = _rainbow_PuRd()
    elif isinstance(cmap, str):
        cmap = plt.get_cmap(cmap)
    colors = cmap(np.linspace(0, 1, num=n_levels))
    contour_kwargs = {} if contour_kwargs is None else contour_kwargs
    CS = ax.contour(X, Y, Z, levels=levels, colors=colors, **contour_kwargs)
    handles, _ = CS.legend_elements()
    ax.legend(
        handles=handles,
        labels=lvl_labels,
        loc="upper left",
        ncol=1,
        frameon=False,
        title="Probability density",
    )
    x_name = semantics["names"][x_idx]
    x_symbol = semantics["symbols"][x_idx]
    x_unit = semantics["units"][x_idx]
    x_label = f"{x_name}," + r" $\it{" + f"{x_symbol}" + r"}$" + f" ({x_unit})"
    y_name = semantics["names"][y_idx]
    y_symbol = semantics["symbols"][y_idx]
    y_unit = semantics["units"][y_idx]
    y_label = f"{y_name}," + r" $\it{" + f"{y_symbol}" + r"}$" + f" ({y_unit})"
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

    return ax




[docs]
def plot_2D_contour(
    contour,
    sample=None,
    design_conditions=None,
    semantics=None,
    swap_axis=False,
    ax=None,
    plot_kwargs=None,
):
    """
    Plot a 2D contour.

    Parameters
    ----------
    contour: Contour
        The 2D contour that should be plotted.
    sample : ndarray of floats, optional
        A 2D data sample that should be plotted along the contour.
        Shape: (number of realizations, 2)
    design_conditions : array-like or boolean, optional
       Specified environmental conditions under which the system must operate.
       If an array it is assumed to be a 2D array of shape
       (number of points, 2) and should contain the precalculated design
       conditions. If it is True design_conditions are computed with default
       values and plotted. Otherwise, no design conditions will be plotted
       (the default).
    semantics: dict, optional
        Generated model description. Defaults to None.
    swap_axis : boolean, optional
        f True the second dimension of the model is plotted on the x-axis and
        the first on the y-axis. Otherwise, vice-versa. Defaults to False.
    ax : matplotlib Axes, optional
        Matplotlib axes object to use for plotting. If None (default) a new
        figure will be created.
    plot_kwargs : dict
        Any other keyword arguments to pass to the matplotlib plotting function,
        Example: {"color": "blue", "linewidth": 2, "label": "100-year contour"

    Returns
    -------
    The matplotlib axes objects plotted into.
    (optional: the design_conditions if not None, yet to implement)

    """

    # design conditions can be True or array
    n_dim = 2
    if swap_axis:
        x_idx = 1
        y_idx = 0
    else:
        x_idx = 0
        y_idx = 1

    if semantics is None:
        semantics = get_default_semantics(n_dim)

    if ax is None:
        _, ax = plt.subplots()

    if design_conditions:
        try:  # if iterable assume it's already the design conditions
            iter(design_conditions)
        except (
            TypeError
        ):  # if it is not an array we compute the default design_conditions
            design_conditions = calculate_design_conditions(
                contour, swap_axis=swap_axis
            )

        ax.scatter(
            design_conditions[:, 0],
            design_conditions[:, 1],
            c="#DDAA33",
            marker="x",
            zorder=2.5,
        )

    if sample is not None:
        sample = np.asarray(sample)
        ax.scatter(
            sample[:, x_idx],
            sample[:, y_idx],
            c="k",
            marker=".",
            alpha=0.3,
            rasterized=True,
        )

    coords = contour.coordinates
    x = coords[:, x_idx].tolist()
    x.append(x[0])
    y = coords[:, y_idx].tolist()
    y.append(y[0])

    # It was thought that this line caused a DepreciationWarning, but the change
    # was reverted as we were not sure about the reason.
    # https://github.com/virocon-organization/virocon/commit/45482e0b5ff2d21c594f0e292b3db9c971881b5c
    # https://github.com/virocon-organization/virocon/pull/124#discussion_r684193507
    plot_kwargs = {} if plot_kwargs is None else plot_kwargs
    if "color" not in plot_kwargs and "c" not in plot_kwargs:
        plot_kwargs["c"] = "#BB5566"
    ax.plot(x, y, **plot_kwargs)
    # ax.plot(np.asarray(x, dtype=object), np.asarray(y, dtype=object), c="#BB5566")

    x_name = semantics["names"][x_idx]
    x_symbol = semantics["symbols"][x_idx]
    x_unit = semantics["units"][x_idx]
    x_label = f"{x_name}," + r" $\it{" + f"{x_symbol}" + r"}$" + f" ({x_unit})"
    y_name = semantics["names"][y_idx]
    y_symbol = semantics["symbols"][y_idx]
    y_unit = semantics["units"][y_idx]
    y_label = f"{y_name}," + r" $\it{" + f"{y_symbol}" + r"}$" + f" ({y_unit})"
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

    if design_conditions is None:
        return ax
    else:
        return ax, design_conditions
Navigation

Source code for virocon.plotting

Navigation