Source code for virocon.plotting

"""
Functions to plot distributions and contours.
"""

import re
from functools import partial

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sts

from matplotlib.colors import LinearSegmentedColormap

from virocon.utils import calculate_design_conditions

__all__ = [
    "plot_marginal_quantiles",
    "plot_dependence_functions",
    "plot_histograms_of_interval_distributions",
    "plot_2D_isodensity",
    "plot_2D_contour",
]


# colors (schemes) chosen according to https://personal.sron.nl/~pault/


def _rainbow_PuRd():
    """
    Thanks to Paul Tol (https://personal.sron.nl/~pault/data/tol_colors.py)
    License:  Standard 3-clause BSD
    """
    clrs = [
        "#6F4C9B",
        "#6059A9",
        "#5568B8",
        "#4E79C5",
        "#4D8AC6",
        "#4E96BC",
        "#549EB3",
        "#59A5A9",
        "#60AB9E",
        "#69B190",
        "#77B77D",
        "#8CBC68",
        "#A6BE54",
        "#BEBC48",
        "#D1B541",
        "#DDAA3C",
        "#E49C39",
        "#E78C35",
        "#E67932",
        "#E4632D",
        "#DF4828",
        "#DA2222",
    ]
    cmap = LinearSegmentedColormap.from_list("rainbow_PuRd", clrs)
    cmap.set_bad("#FFFFFF")
    return cmap


# TODO move to utility as it is also used in contours.py
def get_default_semantics(n_dim):
    """
    Generate default semantics for n_dim dimensions.

    Parameters
    ----------
    n_dim : int
        Number of dimensions. Indicating the number of variables of the model.

    Returns
    -------
    semantics: dict
        Generated model description.

    """

    semantics = {
        "names": [f"Variable {dim + 1}" for dim in range(n_dim)],
        "symbols": [f"X_{dim + 1}" for dim in range(n_dim)],
        "units": ["arb. unit" for dim in range(n_dim)],
    }
    return semantics


def _get_n_axes(n_intervals):
    max_intervals = 16
    if n_intervals > max_intervals:
        raise NotImplementedError(
            f"Automatic axes creation is only supported for up to {max_intervals} intervals."
        )

    table = [
        (1, 1),
        (1, 2),
        (1, 3),
        (2, 2),
        (2, 3),
        (2, 3),
        (3, 3),
        (3, 3),
        (3, 3),
        (4, 4),
        (4, 4),
        (4, 4),
        (4, 4),
        (4, 4),
        (4, 4),
        (4, 4),
    ]

    fig, axes = plt.subplots(
        *table[n_intervals], sharex=True, sharey=True, squeeze=False
    )
    return fig, axes.ravel()


[docs]def plot_marginal_quantiles(model, sample, semantics=None, axes=None): """ Plot all marginal quantiles of a model. Plots the fitted marginal distribution versus a dataset in a quantile-quantile (QQ) plot. Parameters ---------- model : MultivariateModel The model used to plot the marginal quantiles. sample : ndarray of floats The environmental data sample that should be plotted against the fit. Shape: (sample_size, n_dim) semantics: dict, optional The description of the model. If None (the default) generic semantics will be used. The structure is as follows: modeldesc = { "names" : [<Names of variables>], "symbols" : [<Description of symbols>], "units" : [<Units of variables>] } axes: list, optional The matplotlib axes objects to plot into. One for each dimension. If None (the default) a new figure will be created for each dimension. Returns ------- The used matplotlib axes object. Notes ----- When saving the resulting axes in a vector image format (pdf, svg) the `sample` will still be rasterized to reduce the file size. To prevent that, iterate over the axes and use ``ax.get_lines()[0].set_rasterized(False)``. """ sample = np.asarray(sample) n_dim = model.n_dim if semantics is None: semantics = get_default_semantics(n_dim) if axes is None: axes = [] for i in range(n_dim): _, ax = plt.subplots() axes.append(ax) # probplot expects an object that has a ppf method, but we name it icdf # therefor we create a wrapper that maps the ppf to the icdf method class MarginalDistWrapper: def __init__(self, model, idx): self.model = model self.idx = idx def ppf(self, q): return self.model.marginal_icdf(q, self.idx) for dim in range(n_dim): dist_wrapper = MarginalDistWrapper(model, dim) ax = axes[dim] sts.probplot(sample[:, dim], dist=dist_wrapper, fit=False, plot=ax) ax.get_lines()[0].set_markerfacecolor("k") ax.get_lines()[0].set_markeredgecolor("k") ax.get_lines()[0].set_marker("x") ax.get_lines()[0].set_markersize(3) # Because a sample usually holds much more than 1000 observations, the # output shall be rasterized to reduce file size if the figure is saved # in a vector file format (svg, pdf). ax.get_lines()[0].set_rasterized(True) # prior to scipy version 1.7.0 a regression fit line was plotted, # even with option fit=False. As discussed in PR#149 we do not want # to keep this line. So here we remove it if it was plotted. if len(ax.lines) > 1: ax.lines[1].remove() # draw 45° line # adapted from statsmodels.graphics.gofplots.qqline # https://github.com/statsmodels/statsmodels/blob/161ca84b2b6e2b3ba5ef6b570d8c907c9b06f5de/statsmodels/graphics/gofplots.py#L892-L897 end_pts = list(zip(ax.get_xlim(), ax.get_ylim())) end_pts[0] = min(end_pts[0]) end_pts[1] = max(end_pts[1]) ax.plot(end_pts, end_pts, c="#BB5566") ax.set_xlim(end_pts) ax.set_ylim(end_pts) name_and_unit = f"{semantics['names'][dim].lower()} ({semantics['units'][dim]})" ax.set_xlabel(f"Theoretical quantiles of {name_and_unit}") ax.set_ylabel(f"Ordered values of {name_and_unit}") ax.title.set_text("") return axes
[docs]def plot_dependence_functions(model, semantics=None, par_rename={}, axes=None): """ Plot the fitted dependence functions of a model. Parameters ---------- model : MultivariateModel The model with the fitted dependence functions. semantics: dict, optional The description of the model. If None (the default) generic semantics will be used. par_rename : dict, optional A dictionary that maps from names of conditional parameters to a string. If e.g. the model has a distribution with a conditional parameter named 'mu' one could change that in the plot to '$mu$' with {'mu': '$mu$'}. axes: list of matplotlib axes objects, optional If not further specified, the number of axes are dependent on the number of dimensions of the model. Defaults to None. Returns ------- The used matplotlib axes object. """ n_dim = model.n_dim conditional_dist_idc = [ dim for dim in range(n_dim) if model.conditional_on[dim] is not None ] if semantics is None: semantics = get_default_semantics(n_dim) if axes is None: n_axes = 0 for dim in conditional_dist_idc: n_axes += len(model.distributions[dim].conditional_parameters) axes = [] for i in range(n_axes): _, ax = plt.subplots() axes.append(ax) axes_counter = 0 for dim in conditional_dist_idc: dist = model.distributions[dim] conditioning_values = dist.conditioning_values if conditioning_values is not None: x = np.linspace(0, max(conditioning_values)) else: x = np.linspace(0, 10) cond_idx = model.conditional_on[dim] x_name = semantics["names"][cond_idx] x_symbol = semantics["symbols"][cond_idx] x_unit = semantics["units"][cond_idx] x_label = f"{x_name}," + r" $\it{" + f"{x_symbol}" + r"}$" + f" ({x_unit})" for par_name, dep_func in dist.conditional_parameters.items(): ax = axes[axes_counter] axes_counter += 1 # If a model is created directly (without fitting it to a dataset), it does # not have conditioning values. if conditioning_values is not None: par_values = [par[par_name] for par in dist.parameters_per_interval] ax.scatter( conditioning_values, par_values, c="k", marker="x", label="estimates from intervals", ) if dep_func.latex is not None: dep_func_label = dep_func.latex # Replace 'x' with variable symbol (e.g. 'h_s') splitted_symbol = x_symbol.split("_") if len(splitted_symbol) == 1: # If there was no underscore. var_symbol = splitted_symbol[0].lower() else: # If there was one or many underscores. var_symbol = ( splitted_symbol[0].lower() + "_" + "_".join(splitted_symbol[1:]) ) # Replace 'x' if it is not part of '\exp' which is checked by checking whether # 'x' follows '\e'. dep_func_label = re.sub( r"(?<!\\e)x", "{" + var_symbol + "}", dep_func_label ) # Replace symbols of parameters (a, b, ..) with estimated values. for par_name_local, par_value_local in dep_func.parameters.items(): dep_func_label = dep_func_label.replace( par_name_local, "{" + "{:.2f}".format(par_value_local) + "}" ) else: if not isinstance(dep_func.func, partial): dep_func_label = "Dependence function: " + dep_func.func.__name__ else: dep_func_label = ( "Dependence function: " + dep_func.func.func.__name__ ) ax.plot(x, dep_func(x), c="#004488", label=dep_func_label) ax.set_xlabel(x_label) if par_name in par_rename: y_label = par_rename[par_name] else: y_label = par_name ax.set_ylabel(y_label) ax.legend() return axes
[docs]def plot_histograms_of_interval_distributions( model, sample, semantics=None, plot_pdf=True ): """ Plot histograms of all model dimensions. If the model is conditional in a dimension all histograms per interval are plotted for that dimension. In such a case the fitted interval distributions are generally different from the final joint distribution. Parameters ---------- model : MultivariateModel The model that was fitted to the dataset. sample : ndarray The data that was used to fit the model. semantics: dict, optional The description of the model. If None (the default) generic semantics will be used. plot_pdf: boolean, optional Whether the fitted probability density should be plotted. Defaults to True. Returns ------- The used matplotlib axes objects. """ sample = np.asarray(sample) n_dim = model.n_dim if semantics is None: semantics = get_default_semantics(n_dim) figures = [] axes_list = [] for dim in range(n_dim): x_name = semantics["names"][dim] x_symbol = semantics["symbols"][dim] x_unit = semantics["units"][dim] x_label = f"{x_name}," + r" $\it{" + f"{x_symbol}" + r"}$" + f" ({x_unit})" if model.conditional_on[dim] is None: # unconditional data = sample[:, dim] dist = model.distributions[dim] x = np.linspace(np.min(data), np.max(data)) fig, ax = plt.subplots() ax.hist( data, bins="doane", density=True, color="#000000", histtype="stepfilled", alpha=0.2, ) if plot_pdf: density = dist.pdf(x) ax.plot( x, density, color="#004488", ) ax.set_xlabel(x_label) ax.set_ylabel("Probability density") ax.set_title(f"n={len(data)}") figures.append(fig) axes_list.append(ax) else: # conditional cond_dist = model.distributions[dim] dist_per_interval = cond_dist.distributions_per_interval n_intervals = len(dist_per_interval) conditioning_idx = model.conditional_on[dim] conditioning_symbol = semantics["symbols"][conditioning_idx] conditioning_unit = semantics["units"][conditioning_idx] slicer = model.interval_slicers[conditioning_idx] interval_slices, conditioning_values, _ = slicer.slice_( sample[:, conditioning_idx] ) data_intervals = [sample[int_slice, dim] for int_slice in interval_slices] # if the following fails, the sample was probably not used for fitting # TODO raise proper exception then np.testing.assert_allclose( conditioning_values, cond_dist.conditioning_values ) assert len(data_intervals) == len(cond_dist.data_intervals) for i in range(len(data_intervals)): np.testing.assert_allclose( np.sort(data_intervals[i]), np.sort(cond_dist.data_intervals[i]) ) fig, axes = _get_n_axes(n_intervals) for interval_idx in range(n_intervals): cond_val = conditioning_values[interval_idx] data = data_intervals[interval_idx] x = np.linspace(np.min(data), np.max(data)) dist = dist_per_interval[interval_idx] ax = axes[interval_idx] ax.hist( data, bins="doane", density=True, color="#000000", histtype="stepfilled", alpha=0.2, ) if plot_pdf: density = dist.pdf(x) ax.plot( x, density, color="#004488", ) ax.set_xlabel(x_label) ax.set_ylabel("Probability density") title = f"{conditioning_symbol} = {cond_val:.3f} {conditioning_unit}, n={len(data)}" ax.set_title(title) figures.append(fig) axes_list.append(axes) return figures, axes_list
[docs]def plot_2D_isodensity( model, sample, semantics=None, swap_axis=False, limits=None, levels=None, ax=None, n_grid_steps=250, ): """ Plot isodensity contours and a data sample for a 2D model. Parameters ---------- model : MultivariateModel The 2D model to use. sample : ndarray of floats The 2D data sample that should be plotted. semantics: dict, optional The description of the model. If None (the default) generic semantics will be used. swap_axis : boolean, optional If True the second dimension of the model is plotted on the x-axis and the first on the y-axis. Otherwise, vice-versa. Defaults to False. limits : list of tuples, optional Specifies in which rectangular region the density is calculated and plotted. If None (default) limits will be set automatically. Example: [(0, 20), (0, 12)] levels : list of floats, optional The probability density levels that are plotted. If None (default) levels are set automatically. Example: [0.001, 0.01, 0.1] n_grid_steps : int, optional The number of steps along each axis of the grid used to plot the contours. Defaults to 250. ax : matplotlib Axes, optional Matplotlib axes object to use for plotting. If None (default) a new figure will be created. Returns ------- The used matplotlib axes object. """ n_dim = model.n_dim assert n_dim == 2 if swap_axis: x_idx = 1 y_idx = 0 else: x_idx = 0 y_idx = 1 if semantics is None: semantics = get_default_semantics(n_dim) if ax is None: _, ax = plt.subplots() sample = np.asarray(sample) ax.scatter( sample[:, x_idx], sample[:, y_idx], c="k", marker=".", alpha=0.3, rasterized=True, ) if limits is not None: x_lower = limits[0][0] x_upper = limits[0][1] y_lower = limits[1][0] y_upper = limits[1][1] else: x_range = max(sample[:, 0]) - min((sample[:, 0])) expand_factor = 0.05 x_lower = min(sample[:, 0]) - expand_factor * x_range x_upper = max(sample[:, 0]) + expand_factor * x_range y_range = max(sample[:, 1]) - min((sample[:, 1])) y_lower = min(sample[:, 1]) - expand_factor * y_range y_upper = max(sample[:, 1]) + expand_factor * y_range x, y = np.linspace((x_lower, y_lower), (x_upper, y_upper), num=n_grid_steps).T X, Y = np.meshgrid(x, y) grid_flat = np.c_[X.ravel(), Y.ravel()] f = model.pdf(grid_flat) Z = f.reshape(X.shape) if swap_axis: tmp = X X = Y Y = tmp if levels is not None: lvl_labels = ["{:.1E}".format(x) for x in levels] n_levels = len(levels) else: # Define the lowest isodensity level based on the density values on the evaluated grid. q = np.quantile(f, q=0.5) if q > 0: min_lvl = int(f"{q:.0e}".split("e")[1]) else: min_lvl = -5 n_levels = np.abs(min_lvl) levels = np.logspace(-1, min_lvl, num=n_levels)[::-1] lvl_labels = [f"1E{int(i)}" for i in np.linspace(-1, min_lvl, num=n_levels)][ ::-1 ] cmap = _rainbow_PuRd() colors = cmap(np.linspace(0, 1, num=n_levels)) CS = ax.contour(X, Y, Z, levels=levels, colors=colors) proxies = [plt.Line2D([], [], color=pc.get_edgecolor()[0]) for pc in CS.collections] ax.legend( proxies, lvl_labels, loc="upper left", ncol=1, frameon=False, title="Probability density", ) x_name = semantics["names"][x_idx] x_symbol = semantics["symbols"][x_idx] x_unit = semantics["units"][x_idx] x_label = f"{x_name}," + r" $\it{" + f"{x_symbol}" + r"}$" + f" ({x_unit})" y_name = semantics["names"][y_idx] y_symbol = semantics["symbols"][y_idx] y_unit = semantics["units"][y_idx] y_label = f"{y_name}," + r" $\it{" + f"{y_symbol}" + r"}$" + f" ({y_unit})" ax.set_xlabel(x_label) ax.set_ylabel(y_label) return ax
[docs]def plot_2D_contour( contour, sample=None, design_conditions=None, semantics=None, swap_axis=False, ax=None, ): """ Plot a 2D contour. Parameters ---------- contour: Contour The 2D contour that should be plotted. sample : ndarray of floats, optional A 2D data sample that should be plotted along the contour. Shape: (number of realizations, 2) design_conditions : array-like or boolean, optional Specified environmental conditions under which the system must operate. If an array it is assumed to be a 2D array of shape (number of points, 2) and should contain the precalculated design conditions. If it is True design_conditions are computed with default values and plotted. Otherwise, no design conditions will be plotted (the default). semantics: dict, optional Generated model description. Defaults to None. swap_axis : boolean, optional f True the second dimension of the model is plotted on the x-axis and the first on the y-axis. Otherwise, vice-versa. Defaults to False. ax : matplotlib Axes, optional Matplotlib axes object to use for plotting. If None (default) a new figure will be created. Returns ------- The matplotlib axes objects plotted into. (optional: the design_conditions if not None, yet to implement) """ # design conditions can be True or array n_dim = 2 if swap_axis: x_idx = 1 y_idx = 0 else: x_idx = 0 y_idx = 1 if semantics is None: semantics = get_default_semantics(n_dim) if ax is None: _, ax = plt.subplots() if design_conditions: try: # if iterable assume it's already the design conditions iter(design_conditions) except ( TypeError ): # if it is not an array we compute the default design_conditions design_conditions = calculate_design_conditions( contour, swap_axis=swap_axis ) ax.scatter( design_conditions[:, 0], design_conditions[:, 1], c="#DDAA33", marker="x", zorder=2.5, ) if sample is not None: sample = np.asarray(sample) ax.scatter( sample[:, x_idx], sample[:, y_idx], c="k", marker=".", alpha=0.3, rasterized=True, ) coords = contour.coordinates x = coords[:, x_idx].tolist() x.append(x[0]) y = coords[:, y_idx].tolist() y.append(y[0]) # It was thought that this line caused a DepreciationWarning, but the change # was reverted as we were not sure about the reason. # https://github.com/virocon-organization/virocon/commit/45482e0b5ff2d21c594f0e292b3db9c971881b5c # https://github.com/virocon-organization/virocon/pull/124#discussion_r684193507 ax.plot(x, y, c="#BB5566") # ax.plot(np.asarray(x, dtype=object), np.asarray(y, dtype=object), c="#BB5566") x_name = semantics["names"][x_idx] x_symbol = semantics["symbols"][x_idx] x_unit = semantics["units"][x_idx] x_label = f"{x_name}," + r" $\it{" + f"{x_symbol}" + r"}$" + f" ({x_unit})" y_name = semantics["names"][y_idx] y_symbol = semantics["symbols"][y_idx] y_unit = semantics["units"][y_idx] y_label = f"{y_name}," + r" $\it{" + f"{y_symbol}" + r"}$" + f" ({y_unit})" ax.set_xlabel(x_label) ax.set_ylabel(y_label) if design_conditions is None: return ax else: return ax, design_conditions