Source code for moptipy.evaluation.plot_ecdf

"""
Plot a set of ECDF or ERT-ECDF objects into one figure.

The empirical cumulative distribution function (ECDF, see
:mod:`~moptipy.evaluation.ecdf`) is a function that shows the fraction of runs
that were successful in attaining a certain goal objective value over the
time. The combination of ERT and ECDF is discussed in
:mod:`~moptipy.evaluation.ertecdf`.

1. Nikolaus Hansen, Anne Auger, Steffen Finck, Raymond Ros. *Real-Parameter
   Black-Box Optimization Benchmarking 2010: Experimental Setup.*
   Research Report RR-7215, INRIA. 2010. inria-00462481.
   https://hal.inria.fr/inria-00462481/document/
2. Dave Andrew Douglas Tompkins and Holger H. Hoos. UBCSAT: An Implementation
   and Experimentation Environment for SLS Algorithms for SAT and MAX-SAT. In
   *Revised Selected Papers from the Seventh International Conference on
   Theory and Applications of Satisfiability Testing (SAT'04),* May 10-13,
   2004, Vancouver, BC, Canada, pages 306-320. Lecture Notes in Computer
   Science (LNCS), volume 3542. Berlin, Germany: Springer-Verlag GmbH.
   ISBN: 3-540-27829-X. doi: https://doi.org/10.1007/11527695_24.
3. Holger H. Hoos and Thomas Stützle. Evaluating Las Vegas Algorithms -
   Pitfalls and Remedies. In Gregory F. Cooper and Serafín Moral, editors,
   *Proceedings of the 14th Conference on Uncertainty in Artificial
   Intelligence (UAI'98)*, July 24-26, 1998, Madison, WI, USA, pages 238-245.
   San Francisco, CA, USA: Morgan Kaufmann Publishers Inc.
   ISBN: 1-55860-555-X.
"""
from math import inf, isfinite
from typing import Any, Callable, Final, Iterable, cast

import numpy as np
from matplotlib.artist import Artist  # type: ignore
from matplotlib.axes import Axes  # type: ignore
from matplotlib.figure import Figure  # type: ignore
from pycommons.types import type_error

import moptipy.utils.plot_defaults as pd
import moptipy.utils.plot_utils as pu
from moptipy.evaluation.axis_ranger import AxisRanger
from moptipy.evaluation.base import get_algorithm, sort_key
from moptipy.evaluation.ecdf import Ecdf, get_goal, goal_to_str
from moptipy.evaluation.styler import Styler
from moptipy.utils.lang import Lang



[docs]
def plot_ecdf(ecdfs: Iterable[Ecdf],
              figure: Axes | Figure,
              x_axis: AxisRanger | Callable[[str], AxisRanger]
              = AxisRanger.for_axis,
              y_axis: AxisRanger | Callable[[str], AxisRanger]
              = AxisRanger.for_axis,
              legend: bool = True,
              distinct_colors_func: Callable[[int], Any] =
              pd.distinct_colors,
              distinct_line_dashes_func: Callable[[int], Any] =
              pd.distinct_line_dashes,
              importance_to_line_width_func: Callable[[int], float] =
              pd.importance_to_line_width,
              importance_to_alpha_func: Callable[[int], float] =
              pd.importance_to_alpha,
              importance_to_font_size_func: Callable[[int], float] =
              pd.importance_to_font_size,
              x_grid: bool = True,
              y_grid: bool = True,
              x_label: None | str | Callable[[str], str] =
              lambda x: x if isinstance(x, str) else x[0],
              x_label_inside: bool = True,
              y_label: None | str | Callable[[str], str] =
              Lang.translate_func("ECDF"),
              y_label_inside: bool = True,
              algorithm_priority: float = 5.0,
              goal_priority: float = 0.333,
              algorithm_sort_key: Callable[[str], Any] = lambda x: x,
              goal_sort_key: Callable[[str], Any] = lambda x: x,
              algorithm_namer: Callable[[str], str] = lambda x: x,
              color_algorithms_as_fallback_group: bool = True) -> Axes:
    """
    Plot a set of ECDF functions into one chart.

    :param ecdfs: the iterable of ECDF functions
    :param figure: the figure to plot in
    :param x_axis: the x_axis ranger
    :param y_axis: the y_axis ranger
    :param legend: should we plot the legend?
    :param distinct_colors_func: the function returning the palette
    :param distinct_line_dashes_func: the function returning the line styles
    :param importance_to_line_width_func: the function converting importance
        values to line widths
    :param importance_to_alpha_func: the function converting importance
        values to alphas
    :param importance_to_font_size_func: the function converting importance
        values to font sizes
    :param x_grid: should we have a grid along the x-axis?
    :param y_grid: should we have a grid along the y-axis?
    :param x_label: a callable returning the label for the x-axis, a label
        string, or `None` if no label should be put
    :param x_label_inside: put the x-axis label inside the plot (so that
        it does not consume additional vertical space)
    :param y_label: a callable returning the label for the y-axis, a label
        string, or `None` if no label should be put
    :param y_label_inside: put the y-axis label inside the plot (so that
        it does not consume additional horizontal space)
    :param algorithm_priority: the style priority for algorithms
    :param goal_priority: the style priority for goal values
    :param algorithm_namer: the name function for algorithms receives an
        algorithm ID and returns an instance name; default=identity function
    :param color_algorithms_as_fallback_group: if only a single group of data
        was found, use algorithms as group and put them in the legend
    :param algorithm_sort_key: the sort key function for algorithms
    :param goal_sort_key: the sort key function for goals
    :returns: the axes object to allow you to add further plot elements
    """
    # Before doing anything, let's do some type checking on the parameters.
    # I want to ensure that this function is called correctly before we begin
    # to actually process the data. It is better to fail early than to deliver
    # some incorrect results.
    if not isinstance(ecdfs, Iterable):
        raise type_error(ecdfs, "ecdfs", Iterable)
    if not isinstance(figure, Axes | Figure):
        raise type_error(figure, "figure", (Axes, Figure))
    if not isinstance(legend, bool):
        raise type_error(legend, "legend", bool)
    if not callable(distinct_colors_func):
        raise type_error(
            distinct_colors_func, "distinct_colors_func", call=True)
    if not callable(distinct_line_dashes_func):
        raise type_error(
            distinct_line_dashes_func, "distinct_line_dashes_func", call=True)
    if not callable(distinct_line_dashes_func):
        raise type_error(importance_to_line_width_func,
                         "importance_to_line_width_func", call=True)
    if not callable(importance_to_alpha_func):
        raise type_error(
            importance_to_alpha_func, "importance_to_alpha_func", call=True)
    if not callable(importance_to_font_size_func):
        raise type_error(importance_to_font_size_func,
                         "importance_to_font_size_func", call=True)
    if not isinstance(x_grid, bool):
        raise type_error(x_grid, "x_grid", bool)
    if not isinstance(y_grid, bool):
        raise type_error(y_grid, "y_grid", bool)
    if not ((x_label is None) or callable(x_label)
            or isinstance(x_label, str)):
        raise type_error(x_label, "x_label", (str, None), call=True)
    if not isinstance(x_label_inside, bool):
        raise type_error(x_label_inside, "x_label_inside", bool)
    if not ((y_label is None) or callable(y_label)
            or isinstance(y_label, str)):
        raise type_error(y_label, "y_label", (str, None), call=True)
    if not isinstance(y_label_inside, bool):
        raise type_error(y_label_inside, "y_label_inside", bool)
    if not isinstance(algorithm_priority, float):
        raise type_error(algorithm_priority, "algorithm_priority", float)
    if not isfinite(algorithm_priority):
        raise ValueError(f"algorithm_priority cannot be {algorithm_priority}.")
    if not isfinite(goal_priority):
        raise ValueError(f"goal_priority cannot be {goal_priority}.")
    if not callable(algorithm_namer):
        raise type_error(algorithm_namer, "algorithm_namer", call=True)
    if not callable(algorithm_sort_key):
        raise type_error(algorithm_sort_key, "algorithm_sort_key", call=True)
    if not callable(goal_sort_key):
        raise type_error(goal_sort_key, "goal_sort_key", call=True)

    # First, we try to find groups of data to plot together in the same
    # color/style. We distinguish progress objects from statistical runs.
    goals: Final[Styler] = Styler(key_func=get_goal,
                                  namer=goal_to_str,
                                  priority=goal_priority,
                                  name_sort_function=goal_sort_key)
    algorithms: Final[Styler] = Styler(key_func=get_algorithm,
                                       namer=algorithm_namer,
                                       none_name=Lang.translate("all_algos"),
                                       priority=algorithm_priority,
                                       name_sort_function=algorithm_sort_key)
    f_dim: str | None = None
    t_dim: str | None = None
    source: list[Ecdf] = cast(list[Ecdf], ecdfs) \
        if isinstance(ecdfs, list) else list(ecdfs)
    del ecdfs

    x_labels: set[str] = set()

    # First pass: find out the goals and algorithms
    for ee in source:
        if not isinstance(ee, Ecdf):
            raise type_error(ee, "data source", Ecdf)
        goals.add(ee)
        algorithms.add(ee)
        x_labels.add(ee.time_label())

        # Validate that we have consistent time and objective units.
        if f_dim is None:
            f_dim = ee.f_name
        elif f_dim != ee.f_name:
            raise ValueError(
                f"F-units {f_dim} and {ee.f_name} do not fit!")

        if t_dim is None:
            t_dim = ee.time_unit
        elif t_dim != ee.time_unit:
            raise ValueError(
                f"Time units {t_dim} and {ee.time_unit} do not fit!")

    if f_dim is None:
        raise ValueError("f_dim cannot be None")
    if t_dim is None:
        raise ValueError("t_dim cannot be None")
    if (source is None) or (len(source) <= 0):
        raise ValueError(f"source cannot be {source}.")

    # determine the style groups
    groups: list[Styler] = []
    goals.finalize()
    algorithms.finalize()

    # pick the right sorting order
    sf: Callable[[Ecdf], Any] = sort_key
    if (goals.count > 1) and (algorithms.count == 1):
        def __x1(r: Ecdf, ssf=goal_sort_key) -> Any:
            return ssf(goal_to_str(r.goal_f))
        sf = __x1
    elif (goals.count == 1) and (algorithms.count > 1):
        def __x2(r: Ecdf, ssf=algorithm_sort_key) -> Any:
            return ssf(r.algorithm)
        sf = __x2
    elif (goals.count > 1) and (algorithms.count > 1):
        def __x3(r: Ecdf, sgs=goal_sort_key, sas=algorithm_sort_key,
                 ag=algorithm_priority > goal_priority) -> tuple[Any, Any]:
            k1 = sgs(goal_to_str(r.goal_f))
            k2 = sas(r.algorithm)
            return (k2, k1) if ag else (k1, k2)
        sf = __x3

    source.sort(key=sf)

    def __set_importance(st: Styler) -> None:
        none = 1
        not_none = 0
        none_lw = importance_to_line_width_func(none)
        not_none_lw = importance_to_line_width_func(not_none)
        st.set_line_width(lambda p: [none_lw if i <= 0 else not_none_lw
                                     for i in range(p)])
        none_a = importance_to_alpha_func(none)
        not_none_a = importance_to_alpha_func(not_none)
        st.set_line_alpha(lambda p: [none_a if i <= 0 else not_none_a
                                     for i in range(p)])

    if goals.count > 1:
        groups.append(goals)
    if algorithms.count > 1:
        groups.append(algorithms)

    if len(groups) > 0:
        groups.sort()
        groups[0].set_line_color(distinct_colors_func)

        if len(groups) > 1:
            groups[1].set_line_dash(distinct_line_dashes_func)
    elif color_algorithms_as_fallback_group:
        algorithms.set_line_color(distinct_colors_func)
        groups.append(algorithms)

    # If we only have <= 2 groups, we can mark None and not-None values with
    # different importance.
    if goals.has_none and (goals.count > 1):
        __set_importance(goals)
    elif algorithms.has_none and (algorithms.count > 1):
        __set_importance(algorithms)

    # we will collect all lines to plot in plot_list
    plot_list: list[dict] = []

    # set up the axis rangers
    if callable(x_axis):
        x_axis = x_axis(t_dim)
    if not isinstance(x_axis, AxisRanger):
        raise type_error(x_axis, "x_axis", AxisRanger)

    if callable(y_axis):
        y_axis = y_axis("ecdf")
    if not isinstance(y_axis, AxisRanger):
        raise type_error(y_axis, "y_axis", AxisRanger)

    # first we collect all ecdf object
    max_time: int | float = -inf
    max_ecdf: int | float = -inf
    max_ecdf_is_at_max_time: bool = False
    for ee in source:
        style = pd.create_line_style()
        for g in groups:
            g.add_line_style(ee, style)
        x = ee.ecdf[:, 0]
        style["x"] = x
        x_axis.register_array(x)
        y = ee.ecdf[:, 1]
        y_axis.register_array(y)
        style["y"] = y
        plot_list.append(style)

        # We need to detect the special case that the maximum time is at
        # the maximum ECDF value. In this case, we will later need to extend
        # the visible area of the x-axis.
        if len(x) < 2:
            continue
        fy = y[-2]
        ft = x[-2]
        if isfinite(ft):
            if fy >= max_ecdf:
                if fy > max_ecdf:
                    max_ecdf_is_at_max_time = (ft >= max_time)
                    max_ecdf = fy
                else:
                    max_ecdf_is_at_max_time = max_ecdf_is_at_max_time \
                        or (ft >= max_time)
            elif ft > max_time:
                max_ecdf_is_at_max_time = False
            max_time = max(max_time, ft)
    del source

    font_size_0: Final[float] = importance_to_font_size_func(0)

    # If the maximum of any ECDF is located directly at the end of the
    # x-axis, we need to slightly extend the axis to make it visible.
    if max_ecdf_is_at_max_time:
        x_axis.pad_detected_range(pad_max=True)

    # set up the graphics area
    axes: Final[Axes] = pu.get_axes(figure)
    axes.tick_params(axis="x", labelsize=font_size_0)
    axes.tick_params(axis="y", labelsize=font_size_0)

    # draw the grid
    if x_grid or y_grid:
        grid_lwd = importance_to_line_width_func(-1)
        if x_grid:
            axes.grid(axis="x", color=pd.GRID_COLOR, linewidth=grid_lwd)
        if y_grid:
            axes.grid(axis="y", color=pd.GRID_COLOR, linewidth=grid_lwd)

    max_x: float = x_axis.get_pinf_replacement()
    min_x: float | None = x_axis.get_0_replacement() \
        if x_axis.log_scale else None

    # plot the lines
    for line in plot_list:
        x = line["x"]
        changed = False
        if np.isposinf(x[-1]):
            x = x.copy()
            x[-1] = max_x
            changed = True
        if (x[0] <= 0) and (min_x is not None):
            if not changed:
                changed = True
                x = x.copy()
            x[0] = min_x
        if changed:
            line["x"] = x
        axes.step(where="post", **line)
    del plot_list

    x_axis.apply(axes, "x")
    y_axis.apply(axes, "y")

    if legend:
        handles: list[Artist] = []

        for g in groups:
            g.add_to_legend(handles.append)
            g.has_style = False

        if algorithms.has_style:
            algorithms.add_to_legend(handles.append)
        if goals.has_style:
            goals.add_to_legend(handles.append)

        axes.legend(loc="upper left",
                    handles=handles,
                    labelcolor=[art.color if hasattr(art, "color")
                                else pd.COLOR_BLACK for art in handles],
                    fontsize=font_size_0)

    pu.label_axes(axes=axes,
                  x_label=" ".join([x_label(x) for x in sorted(x_labels)])
                  if callable(x_label) else x_label,
                  x_label_inside=x_label_inside,
                  x_label_location=1,
                  y_label=y_label(f_dim) if callable(y_label) else y_label,
                  y_label_inside=y_label_inside,
                  y_label_location=0,
                  font_size=font_size_0)
    return axes