Source code for deap.cma

#    This file is part of DEAP.
#
#    DEAP is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Lesser General Public License as
#    published by the Free Software Foundation, either version 3 of
#    the License, or (at your option) any later version.
#
#    DEAP is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#    GNU Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with DEAP. If not, see <http://www.gnu.org/licenses/>.

#    Special thanks to Nikolaus Hansen for providing major part of
#    this code. The CMA-ES algorithm is provided in many other languages
#    and advanced versions at http://www.lri.fr/~hansen/cmaesintro.html.

"""A module that provides support for the Covariance Matrix Adaptation
Evolution Strategy.
"""
import copy
from math import sqrt, log, exp
from itertools import cycle
import warnings

import numpy

from . import tools


[docs]class Strategy(object): """ A strategy that will keep track of the basic parameters of the CMA-ES algorithm ([Hansen2001]_). :param centroid: An iterable object that indicates where to start the evolution. :param sigma: The initial standard deviation of the distribution. :param parameter: One or more parameter to pass to the strategy as described in the following table, optional. +----------------+---------------------------+----------------------------+ | Parameter | Default | Details | +================+===========================+============================+ | ``lambda_`` | ``int(4 + 3 * log(N))`` | Number of children to | | | | produce at each generation,| | | | ``N`` is the individual's | | | | size (integer). | +----------------+---------------------------+----------------------------+ | ``mu`` | ``int(lambda_ / 2)`` | The number of parents to | | | | keep from the | | | | lambda children (integer). | +----------------+---------------------------+----------------------------+ | ``cmatrix`` | ``identity(N)`` | The initial covariance | | | | matrix of the distribution | | | | that will be sampled. | +----------------+---------------------------+----------------------------+ | ``weights`` | ``"superlinear"`` | Decrease speed, can be | | | | ``"superlinear"``, | | | | ``"linear"`` or | | | | ``"equal"``. | +----------------+---------------------------+----------------------------+ | ``cs`` | ``(mueff + 2) / | Cumulation constant for | | | (N + mueff + 3)`` | step-size. | +----------------+---------------------------+----------------------------+ | ``damps`` | ``1 + 2 * max(0, sqrt(( | Damping for step-size. | | | mueff - 1) / (N + 1)) - 1)| | | | + cs`` | | +----------------+---------------------------+----------------------------+ | ``ccum`` | ``4 / (N + 4)`` | Cumulation constant for | | | | covariance matrix. | +----------------+---------------------------+----------------------------+ | ``ccov1`` | ``2 / ((N + 1.3)^2 + | Learning rate for rank-one | | | mueff)`` | update. | +----------------+---------------------------+----------------------------+ | ``ccovmu`` | ``2 * (mueff - 2 + 1 / | Learning rate for rank-mu | | | mueff) / ((N + 2)^2 + | update. | | | mueff)`` | | +----------------+---------------------------+----------------------------+ .. [Hansen2001] Hansen and Ostermeier, 2001. Completely Derandomized Self-Adaptation in Evolution Strategies. *Evolutionary Computation* """ def __init__(self, centroid, sigma, **kargs): self.params = kargs # Create a centroid as a numpy array self.centroid = numpy.array(centroid) self.dim = len(self.centroid) self.sigma = sigma self.pc = numpy.zeros(self.dim) self.ps = numpy.zeros(self.dim) self.chiN = sqrt(self.dim) * (1 - 1. / (4. * self.dim) + 1. / (21. * self.dim ** 2)) self.C = self.params.get("cmatrix", numpy.identity(self.dim)) self.diagD, self.B = numpy.linalg.eigh(self.C) indx = numpy.argsort(self.diagD) self.diagD = self.diagD[indx] ** 0.5 self.B = self.B[:, indx] self.BD = self.B * self.diagD self.cond = self.diagD[indx[-1]] / self.diagD[indx[0]] self.lambda_ = self.params.get("lambda_", int(4 + 3 * log(self.dim))) self.update_count = 0 self.computeParams(self.params)
[docs] def generate(self, ind_init): r"""Generate a population of :math:`\lambda` individuals of type *ind_init* from the current strategy. :param ind_init: A function object that is able to initialize an individual from a list. :returns: A list of individuals. """ arz = numpy.random.standard_normal((self.lambda_, self.dim)) arz = self.centroid + self.sigma * numpy.dot(arz, self.BD.T) return [ind_init(a) for a in arz]
[docs] def update(self, population): """Update the current covariance matrix strategy from the *population*. :param population: A list of individuals from which to update the parameters. """ population.sort(key=lambda ind: ind.fitness, reverse=True) old_centroid = self.centroid self.centroid = numpy.dot(self.weights, population[0:self.mu]) c_diff = self.centroid - old_centroid # Cumulation : update evolution path self.ps = (1 - self.cs) * self.ps \ + sqrt(self.cs * (2 - self.cs) * self.mueff) / self.sigma \ * numpy.dot(self.B, (1. / self.diagD) * numpy.dot(self.B.T, c_diff)) hsig = float((numpy.linalg.norm(self.ps) / sqrt(1. - (1. - self.cs) ** (2. * (self.update_count + 1.))) / self.chiN < (1.4 + 2. / (self.dim + 1.)))) self.update_count += 1 self.pc = (1 - self.cc) * self.pc + hsig \ * sqrt(self.cc * (2 - self.cc) * self.mueff) / self.sigma \ * c_diff # Update covariance matrix artmp = population[0:self.mu] - old_centroid self.C = (1 - self.ccov1 - self.ccovmu + (1 - hsig) * self.ccov1 * self.cc * (2 - self.cc)) * self.C \ + self.ccov1 * numpy.outer(self.pc, self.pc) \ + self.ccovmu * numpy.dot((self.weights * artmp.T), artmp) \ / self.sigma ** 2 self.sigma *= numpy.exp((numpy.linalg.norm(self.ps) / self.chiN - 1.) * self.cs / self.damps) self.diagD, self.B = numpy.linalg.eigh(self.C) indx = numpy.argsort(self.diagD) self.cond = self.diagD[indx[-1]] / self.diagD[indx[0]] self.diagD = self.diagD[indx] ** 0.5 self.B = self.B[:, indx] self.BD = self.B * self.diagD
[docs] def computeParams(self, params): r"""Computes the parameters depending on :math:`\lambda`. It needs to be called again if :math:`\lambda` changes during evolution. :param params: A dictionary of the manually set parameters. """ self.mu = params.get("mu", int(self.lambda_ / 2)) rweights = params.get("weights", "superlinear") if rweights == "superlinear": self.weights = log(self.mu + 0.5) - \ numpy.log(numpy.arange(1, self.mu + 1)) elif rweights == "linear": self.weights = self.mu + 0.5 - numpy.arange(1, self.mu + 1) elif rweights == "equal": self.weights = numpy.ones(self.mu) else: raise RuntimeError("Unknown weights : %s" % rweights) self.weights /= sum(self.weights) self.mueff = 1. / sum(self.weights ** 2) self.cc = params.get("ccum", 4. / (self.dim + 4.)) self.cs = params.get("cs", (self.mueff + 2.) / (self.dim + self.mueff + 3.)) self.ccov1 = params.get("ccov1", 2. / ((self.dim + 1.3) ** 2 + self.mueff)) self.ccovmu = params.get("ccovmu", 2. * (self.mueff - 2. + 1. / self.mueff) / ((self.dim + 2.) ** 2 + self.mueff)) self.ccovmu = min(1 - self.ccov1, self.ccovmu) self.damps = 1. + 2. * max(0, sqrt((self.mueff - 1.) / (self.dim + 1.)) - 1.) + self.cs self.damps = params.get("damps", self.damps)
[docs]class StrategyOnePlusLambda(object): r""" A CMA-ES strategy that uses the :math:`1 + \lambda` paradigm ([Igel2007]_). :param parent: An iterable object that indicates where to start the evolution. The parent requires a fitness attribute. :param sigma: The initial standard deviation of the distribution. :param lambda_: Number of offspring to produce from the parent. (optional, defaults to 1) :param parameter: One or more parameter to pass to the strategy as described in the following table. (optional) Other parameters can be provided as described in the next table +----------------+---------------------------+----------------------------+ | Parameter | Default | Details | +================+===========================+============================+ | ``d`` | ``1.0 + N / (2.0 * | Damping for step-size. | | | lambda_)`` | | +----------------+---------------------------+----------------------------+ | ``ptarg`` | ``1.0 / (5 + sqrt(lambda_)| Target success rate. | | | / 2.0)`` | | +----------------+---------------------------+----------------------------+ | ``cp`` | ``ptarg * lambda_ / (2.0 +| Step size learning rate. | | | ptarg * lambda_)`` | | +----------------+---------------------------+----------------------------+ | ``cc`` | ``2.0 / (N + 2.0)`` | Cumulation time horizon. | +----------------+---------------------------+----------------------------+ | ``ccov`` | ``2.0 / (N**2 + 6.0)`` | Covariance matrix learning | | | | rate. | +----------------+---------------------------+----------------------------+ | ``pthresh`` | ``0.44`` | Threshold success rate. | +----------------+---------------------------+----------------------------+ .. [Igel2007] Igel, Hansen, Roth, 2007. Covariance matrix adaptation for multi-objective optimization. *Evolutionary Computation* Spring;15(1):1-28 """ def __init__(self, parent, sigma, **kargs): self.parent = parent self.sigma = sigma self.dim = len(self.parent) self.C = numpy.identity(self.dim) self.A = numpy.identity(self.dim) self.pc = numpy.zeros(self.dim) self.computeParams(kargs) self.psucc = self.ptarg
[docs] def computeParams(self, params): r"""Computes the parameters depending on :math:`\lambda`. It needs to be called again if :math:`\lambda` changes during evolution. :param params: A dictionary of the manually set parameters. """ # Selection : self.lambda_ = params.get("lambda_", 1) # Step size control : self.d = params.get("d", 1.0 + self.dim / (2.0 * self.lambda_)) self.ptarg = params.get("ptarg", 1.0 / (5 + sqrt(self.lambda_) / 2.0)) self.cp = params.get("cp", self.ptarg * self.lambda_ / (2 + self.ptarg * self.lambda_)) # Covariance matrix adaptation self.cc = params.get("cc", 2.0 / (self.dim + 2.0)) self.ccov = params.get("ccov", 2.0 / (self.dim ** 2 + 6.0)) self.pthresh = params.get("pthresh", 0.44)
[docs] def generate(self, ind_init): r"""Generate a population of :math:`\lambda` individuals of type *ind_init* from the current strategy. :param ind_init: A function object that is able to initialize an individual from a list. :returns: A list of individuals. """ # self.y = numpy.dot(self.A, numpy.random.standard_normal(self.dim)) arz = numpy.random.standard_normal((self.lambda_, self.dim)) arz = self.parent + self.sigma * numpy.dot(arz, self.A.T) return [ind_init(a) for a in arz]
[docs] def update(self, population): """Update the current covariance matrix strategy from the *population*. :param population: A list of individuals from which to update the parameters. """ population.sort(key=lambda ind: ind.fitness, reverse=True) lambda_succ = sum(self.parent.fitness <= ind.fitness for ind in population) p_succ = float(lambda_succ) / self.lambda_ self.psucc = (1 - self.cp) * self.psucc + self.cp * p_succ if self.parent.fitness <= population[0].fitness: x_step = (population[0] - numpy.array(self.parent)) / self.sigma self.parent = copy.deepcopy(population[0]) if self.psucc < self.pthresh: self.pc = (1 - self.cc) * self.pc + sqrt(self.cc * (2 - self.cc)) * x_step self.C = (1 - self.ccov) * self.C + self.ccov * numpy.outer(self.pc, self.pc) else: self.pc = (1 - self.cc) * self.pc self.C = (1 - self.ccov) * self.C + self.ccov * (numpy.outer(self.pc, self.pc) + self.cc * (2 - self.cc) * self.C) self.sigma = self.sigma * exp(1.0 / self.d * (self.psucc - self.ptarg) / (1.0 - self.ptarg)) # We use Cholesky since for now we have no use of eigen decomposition # Basically, Cholesky returns a matrix A as C = A*A.T # Eigen decomposition returns two matrix B and D^2 as C = B*D^2*B.T = B*D*D*B.T # So A == B*D # To compute the new individual we need to multiply each vector z by A # as y = centroid + sigma * A*z # So the Cholesky is more straightforward as we don't need to compute # the squareroot of D^2, and multiply B and D in order to get A, we directly get A. # This can't be done (without cost) with the standard CMA-ES as the eigen decomposition is used # to compute covariance matrix inverse in the step-size evolutionary path computation. self.A = numpy.linalg.cholesky(self.C)
[docs]class StrategyMultiObjective(object): """Multiobjective CMA-ES strategy based on the paper [Voss2010]_. It is used similarly as the standard CMA-ES strategy with a generate-update scheme. :param population: An initial population of individual. :param sigma: The initial step size of the complete system. :param mu: The number of parents to use in the evolution. When not provided it defaults to the length of *population*. (optional) :param lambda_: The number of offspring to produce at each generation. (optional, defaults to 1) :param indicator: The indicator function to use. (optional, default to :func:`~deap.tools.hypervolume`) Other parameters can be provided as described in the next table +----------------+---------------------------+----------------------------+ | Parameter | Default | Details | +================+===========================+============================+ | ``d`` | ``1.0 + N / 2.0`` | Damping for step-size. | +----------------+---------------------------+----------------------------+ | ``ptarg`` | ``1.0 / (5 + 1.0 / 2.0)`` | Target success rate. | +----------------+---------------------------+----------------------------+ | ``cp`` | ``ptarg / (2.0 + ptarg)`` | Step size learning rate. | +----------------+---------------------------+----------------------------+ | ``cc`` | ``2.0 / (N + 2.0)`` | Cumulation time horizon. | +----------------+---------------------------+----------------------------+ | ``ccov`` | ``2.0 / (N**2 + 6.0)`` | Covariance matrix learning | | | | rate. | +----------------+---------------------------+----------------------------+ | ``pthresh`` | ``0.44`` | Threshold success rate. | +----------------+---------------------------+----------------------------+ .. [Voss2010] Voss, Hansen, Igel, "Improved Step Size Adaptation for the MO-CMA-ES", 2010. """ def __init__(self, population, sigma, **params): self.parents = population self.dim = len(self.parents[0]) # Selection self.mu = params.get("mu", len(self.parents)) self.lambda_ = params.get("lambda_", 1) # Step size control self.d = params.get("d", 1.0 + self.dim / 2.0) self.ptarg = params.get("ptarg", 1.0 / (5.0 + 0.5)) self.cp = params.get("cp", self.ptarg / (2.0 + self.ptarg)) # Covariance matrix adaptation self.cc = params.get("cc", 2.0 / (self.dim + 2.0)) self.ccov = params.get("ccov", 2.0 / (self.dim ** 2 + 6.0)) self.pthresh = params.get("pthresh", 0.44) # Internal parameters associated to the mu parent self.sigmas = [sigma] * len(population) # Lower Cholesky matrix (Sampling matrix) self.A = [numpy.identity(self.dim) for _ in range(len(population))] # Inverse Cholesky matrix (Used in the update of A) self.invCholesky = [numpy.identity(self.dim) for _ in range(len(population))] self.pc = [numpy.zeros(self.dim) for _ in range(len(population))] self.psucc = [self.ptarg] * len(population) self.indicator = params.get("indicator", tools.hypervolume)
[docs] def generate(self, ind_init): r"""Generate a population of :math:`\lambda` individuals of type *ind_init* from the current strategy. :param ind_init: A function object that is able to initialize an individual from a list. :returns: A list of individuals with a private attribute :attr:`_ps`. This last attribute is essential to the update function, it indicates that the individual is an offspring and the index of its parent. """ arz = numpy.random.randn(self.lambda_, self.dim) individuals = list() # Make sure every parent has a parent tag and index for i, p in enumerate(self.parents): p._ps = "p", i # Each parent produce an offspring if self.lambda_ == self.mu: for i in range(self.lambda_): # print "Z", list(arz[i]) individuals.append(ind_init(self.parents[i] + self.sigmas[i] * numpy.dot(self.A[i], arz[i]))) individuals[-1]._ps = "o", i # Parents producing an offspring are chosen at random from the first front else: ndom = tools.sortLogNondominated(self.parents, len(self.parents), first_front_only=True) for i in range(self.lambda_): j = numpy.random.randint(0, len(ndom)) _, p_idx = ndom[j]._ps individuals.append(ind_init(self.parents[p_idx] + self.sigmas[p_idx] * numpy.dot(self.A[p_idx], arz[i]))) individuals[-1]._ps = "o", p_idx return individuals
def _select(self, candidates): if len(candidates) <= self.mu: return candidates, [] pareto_fronts = tools.sortLogNondominated(candidates, len(candidates)) chosen = list() mid_front = None not_chosen = list() # Fill the next population (chosen) with the fronts until there is not enough space # When an entire front does not fit in the space left we rely on the hypervolume # for this front # The remaining fronts are explicitly not chosen full = False for front in pareto_fronts: if len(chosen) + len(front) <= self.mu and not full: chosen += front elif mid_front is None and len(chosen) < self.mu: mid_front = front # With this front, we selected enough individuals full = True else: not_chosen += front # Separate the mid front to accept only k individuals k = self.mu - len(chosen) if k > 0: # reference point is chosen in the complete population # as the worst in each dimension +1 ref = numpy.array([ind.fitness.wvalues for ind in candidates]) * -1 ref = numpy.max(ref, axis=0) + 1 for _ in range(len(mid_front) - k): idx = self.indicator(mid_front, ref=ref) not_chosen.append(mid_front.pop(idx)) chosen += mid_front return chosen, not_chosen def _rankOneUpdate(self, invCholesky, A, alpha, beta, v): w = numpy.dot(invCholesky, v) # Under this threshold, the update is mostly noise if w.max() > 1e-20: w_inv = numpy.dot(w, invCholesky) norm_w2 = numpy.sum(w ** 2) a = sqrt(alpha) root = numpy.sqrt(1 + beta / alpha * norm_w2) b = a / norm_w2 * (root - 1) A = a * A + b * numpy.outer(v, w) invCholesky = 1.0 / a * invCholesky - b / (a ** 2 + a * b * norm_w2) * numpy.outer(w, w_inv) return invCholesky, A
[docs] def update(self, population): """Update the current covariance matrix strategies from the *population*. :param population: A list of individuals from which to update the parameters. """ chosen, not_chosen = self._select(population + self.parents) cp, cc, ccov = self.cp, self.cc, self.ccov d, ptarg, pthresh = self.d, self.ptarg, self.pthresh # Make copies for chosen offspring only last_steps = [self.sigmas[ind._ps[1]] if ind._ps[0] == "o" else None for ind in chosen] sigmas = [self.sigmas[ind._ps[1]] if ind._ps[0] == "o" else None for ind in chosen] invCholesky = [self.invCholesky[ind._ps[1]].copy() if ind._ps[0] == "o" else None for ind in chosen] A = [self.A[ind._ps[1]].copy() if ind._ps[0] == "o" else None for ind in chosen] pc = [self.pc[ind._ps[1]].copy() if ind._ps[0] == "o" else None for ind in chosen] psucc = [self.psucc[ind._ps[1]] if ind._ps[0] == "o" else None for ind in chosen] # Update the internal parameters for successful offspring for i, ind in enumerate(chosen): t, p_idx = ind._ps # Only the offspring update the parameter set if t == "o": # Update (Success = 1 since it is chosen) psucc[i] = (1.0 - cp) * psucc[i] + cp sigmas[i] = sigmas[i] * exp((psucc[i] - ptarg) / (d * (1.0 - ptarg))) if psucc[i] < pthresh: xp = numpy.array(ind) x = numpy.array(self.parents[p_idx]) pc[i] = (1.0 - cc) * pc[i] + sqrt(cc * (2.0 - cc)) * (xp - x) / last_steps[i] invCholesky[i], A[i] = self._rankOneUpdate(invCholesky[i], A[i], 1 - ccov, ccov, pc[i]) else: pc[i] = (1.0 - cc) * pc[i] pc_weight = cc * (2.0 - cc) invCholesky[i], A[i] = self._rankOneUpdate(invCholesky[i], A[i], 1 - ccov + pc_weight, ccov, pc[i]) self.psucc[p_idx] = (1.0 - cp) * self.psucc[p_idx] + cp self.sigmas[p_idx] = self.sigmas[p_idx] * exp((self.psucc[p_idx] - ptarg) / (d * (1.0 - ptarg))) # It is unnecessary to update the entire parameter set for not chosen individuals # Their parameters will not make it to the next generation for ind in not_chosen: t, p_idx = ind._ps # Only the offspring update the parameter set if t == "o": self.psucc[p_idx] = (1.0 - cp) * self.psucc[p_idx] self.sigmas[p_idx] = self.sigmas[p_idx] * exp((self.psucc[p_idx] - ptarg) / (d * (1.0 - ptarg))) # Make a copy of the internal parameters # The parameter is in the temporary variable for offspring and in the original one for parents self.parents = chosen self.sigmas = [sigmas[i] if ind._ps[0] == "o" else self.sigmas[ind._ps[1]] for i, ind in enumerate(chosen)] self.invCholesky = [invCholesky[i] if ind._ps[0] == "o" else self.invCholesky[ind._ps[1]] for i, ind in enumerate(chosen)] self.A = [A[i] if ind._ps[0] == "o" else self.A[ind._ps[1]] for i, ind in enumerate(chosen)] self.pc = [pc[i] if ind._ps[0] == "o" else self.pc[ind._ps[1]] for i, ind in enumerate(chosen)] self.psucc = [psucc[i] if ind._ps[0] == "o" else self.psucc[ind._ps[1]] for i, ind in enumerate(chosen)]
class StrategyActiveOnePlusLambda(object): """A CMA-ES strategy that combines the :math:`(1 + \\lambda)` paradigm [Igel2007]_, the mixed integer modification [Hansen2011]_, active covariance update [Arnold2010]_ and constraint handling [Arnold2012]_. This version of CMA-ES requires the random vector and the mutation that created each individual. The vector and mutation are stored in each individual as :attr:`_z` and :attr:`_y` respectively. Updating with individuals not containing these attributes will result in an :class:`AttributeError`. Notes: When using this strategy (especially when using constraints) you should monitor the strategy :attr:`condition_number`. If it goes above a given threshold (say :math:`10^{12}`), you should think of restarting the optimization as the covariance matrix is going degenerate. See the constrained active CMA-ES example for a simple example of restart. :param parent: An iterable object that indicates where to start the evolution. The parent requires a fitness attribute. :param sigma: The initial standard deviation of the distribution. :param step: The minimal step size for each dimension. Use 0 for continuous dimensions. :param lambda_: Number of offspring to produce from the parent. (optional, defaults to 1) :param **kwargs: One or more parameter to pass to the strategy as described in the following table. (optional) +----------------+---------------------------+------------------------------+ | Parameter | Default | Details | +================+===========================+==============================+ | ``d`` | ``1.0 + N / (2.0 * | Damping for step-size. | | | lambda_)`` | | +----------------+---------------------------+------------------------------+ | ``ptarg`` | ``1.0 / (5 + sqrt(lambda_)| Taget success rate | | | / 2.0)`` | (from 1 + lambda algorithm). | +----------------+---------------------------+------------------------------+ | ``cp`` | ``ptarg * lambda_ / (2.0 +| Step size learning rate. | | | ptarg * lambda_)`` | | +----------------+---------------------------+------------------------------+ | ``cc`` | ``2.0 / (N + 2.0)`` | Cumulation time horizon. | +----------------+---------------------------+------------------------------+ | ``ccov`` | ``2.0 / (N**2 + 6.0)`` | Covariance matrix learning | | | | rate. | +----------------+---------------------------+------------------------------+ | ``ccovn`` | ``0.4 / (N**1.6 + 1.0)`` | Covariance matrix negative | | | | learning rate. | +----------------+---------------------------+------------------------------+ | ``cconst`` | ``1.0 / (N + 2.0)`` | Constraint vectors learning | | | | rate. | +----------------+---------------------------+------------------------------+ | ``beta`` | ``0.1 / (lambda_ * (N + | Covariance matrix learning | | | 2.0))`` | rate for constraints. | | | | | +----------------+---------------------------+------------------------------+ | ``pthresh`` | ``0.44`` | Threshold success rate. | +----------------+---------------------------+------------------------------+ .. [Igel2007] Igel, Hansen and Roth. Covariance matrix adaptation for multi-objective optimization. 2007 .. [Arnold2010] Arnold and Hansen. Active covariance matrix adaptation for the (1+1)-CMA-ES. 2010. .. [Hansen2011] Hansen. A CMA-ES for Mixed-Integer Nonlinear Optimization. Research Report] RR-7751, INRIA. 2011 .. [Arnold2012] Arnold and Hansen. A (1+1)-CMA-ES for Constrained Optimisation. 2012 """ def __init__(self, parent, sigma, steps, **kargs): self.parent = parent self.sigma = sigma self.dim = len(self.parent) self.A = numpy.identity(self.dim) self.invA = numpy.identity(self.dim) self.condition_number = numpy.linalg.cond(self.A) self.pc = numpy.zeros(self.dim) # Save parameters self.params = kargs.copy() # Covariance matrix adaptation self.cc = self.params.get("cc", 2.0 / (self.dim + 2.0)) self.ccovp = self.params.get("ccovp", 2.0 / (self.dim ** 2 + 6.0)) self.ccovn = self.params.get("ccovn", 0.4 / (self.dim ** 1.6 + 1.0)) self.cconst = self.params.get("cconst", 1.0 / (self.dim + 2.0)) self.pthresh = self.params.get("pthresh", 0.44) self.lambda_ = self.params.get("lambda_", 1) self.psucc = self.ptarg self.S_int = numpy.array(steps) self.i_I_R = numpy.flatnonzero(2 * self.sigma * numpy.diag(self.A)**0.5 < self.S_int) self.constraint_vecs = None self.ancestors_fitness = list() @property def lambda_(self): return self._lambda @lambda_.setter def lambda_(self, value): self._lambda = value self._compute_lambda_parameters() def _compute_lambda_parameters(self): """Computes the parameters depending on :math:`\lambda`. It needs to be called again if :math:`\lambda` changes during evolution. """ # Step size control : self.d = self.params.get("d", 1.0 + self.dim / (2.0 * self.lambda_)) self.ptarg = self.params.get("ptarg", 1.0 / (5 + numpy.sqrt(self.lambda_) / 2.0)) self.cp = self.params.get("cp", (self.ptarg * self.lambda_ / (2 + self.ptarg * self.lambda_))) self.beta = self.params.get("beta", 0.1 / (self.lambda_ * (self.dim + 2.0))) def generate(self, ind_init): """Generate a population of :math:`\lambda` individuals of type *ind_init* from the current strategy. :param ind_init: A function object that is able to initialize an individual from a list. :returns: A list of individuals. """ # Generate individuals z = numpy.random.standard_normal((self.lambda_, self.dim)) y = numpy.dot(self.A, z.T).T x = self.parent + self.sigma * y + self.S_int * self._integer_mutation() if any(self.S_int > 0): # Bring values to the integer steps round_values = numpy.tile(self.S_int > 0, (self.lambda_, 1)) steps = numpy.tile(self.S_int, (self.lambda_, 1)) x[round_values] = steps[round_values] * numpy.around(x[round_values] / steps[round_values]) # The update method requires to remember the y of each individual population = list(map(ind_init, x)) for ind, yi, zi in zip(population, y, z): ind._y = yi ind._z = zi return population def _integer_mutation(self): n_I_R = self.i_I_R.shape[0] # Mixed integer CMA-ES is developped for (mu/mu , lambda) # We have a (1 + lambda) setting, thus we make the integer mutation # probabilistic. The integer mutation is lambda / 2 if all dimensions # are integers or min(lambda / 2 - 1, lambda / 10 + n_I_R + 1). The minus # 1 accounts for the last new candidate getting its integer mutation from # the last best solution. We skip this last best solution part. if n_I_R == 0: return numpy.zeros((self.lambda_, self.dim)) elif n_I_R == self.dim: p = self.lambda_ / 2.0 / self.lambda_ # lambda_int = int(numpy.floor(self.lambda_ / 2)) else: p = (min(self.lambda_ / 2.0, self.lambda_ / 10.0 + n_I_R / self.dim) / self.lambda_) # lambda_int = int(min(numpy.floor(self.lambda_ / 10) + n_I_R + 1, # numpy.floor(self.lambda_ / 2) - 1)) Rp = numpy.zeros((self.lambda_, self.dim)) Rpp = numpy.zeros((self.lambda_, self.dim)) # Ri' has exactly one of its components set to one. # The Ri' are dependent in that the number of mutations for each coordinate # differs at most by one for i, j in zip(range(self.lambda_), cycle(self.i_I_R)): # Probabilistically choose lambda_int individuals if numpy.random.rand() < p: Rp[i, j] = 1 Rpp[i, j] = numpy.random.geometric(p=0.7**(1.0 / n_I_R)) - 1 I_pm1 = (-1)**numpy.random.randint(0, 2, (self.lambda_, self.dim)) R_int = I_pm1 * (Rp + Rpp) # Usually in mu/mu, lambda the last individual is set to the step taken. # We don't use this sheme in the 1 + lambda scheme # if self.update_count > 0: # R_int[-1, :] = (numpy.floor(-self.S_int - self.last_best) # - numpy.floor(-self.S_int - self.centroid)) return R_int def _rank1update(self, individual, p_succ): update_cov = False self.psucc = (1 - self.cp) * self.psucc + self.cp * p_succ if not hasattr(self.parent, "fitness") \ or self.parent.fitness <= individual.fitness: self.parent = copy.deepcopy(individual) self.ancestors_fitness.append(copy.deepcopy(individual.fitness)) if len(self.ancestors_fitness) > 5: self.ancestors_fitness.pop() # Must guard if pc is all 0 to prevent w_norm_sqrd to be 0 if self.psucc < self.pthresh or numpy.allclose(self.pc, 0): self.pc = (1 - self.cc) * self.pc + (numpy.sqrt(self.cc * (2 - self.cc)) * individual._y) a = numpy.sqrt(1 - self.ccovp) w = numpy.dot(self.invA, self.pc) w_norm_sqrd = numpy.linalg.norm(w) ** 2 b = numpy.sqrt(1 - self.ccovp) / w_norm_sqrd \ * (numpy.sqrt(1 + self.ccovp / (1 - self.ccovp) * w_norm_sqrd) - 1) else: self.pc = (1 - self.cc) * self.pc d = self.ccovp * (1 + self.cc * (2 - self.cc)) a = numpy.sqrt(1 - d) w = numpy.dot(self.invA, self.pc) w_norm_sqrd = numpy.linalg.norm(w) ** 2 b = numpy.sqrt(1 - d) \ * (numpy.sqrt(1 + self.ccovp * w_norm_sqrd / (1 - d)) - 1) \ / w_norm_sqrd update_cov = True elif len(self.ancestors_fitness) >= 5 \ and individual.fitness < self.ancestors_fitness[0] \ and self.psucc < self.pthresh: # Active covariance update requires w = z and not w = inv(A)s w = individual._z w_norm_sqrd = numpy.linalg.norm(w) ** 2 if 1 < self.ccovn * (2 * w_norm_sqrd - 1): ccovn = 1 / (2 * w_norm_sqrd - 1) else: ccovn = self.ccovn a = numpy.sqrt(1 + ccovn) b = numpy.sqrt(1 + ccovn) / w_norm_sqrd \ * (numpy.sqrt(1 - ccovn / (1 + ccovn) * w_norm_sqrd) - 1) update_cov = True if update_cov: self.A = self.A * a + b * numpy.outer(numpy.dot(self.A, w), w) self.invA = (1 / a * self.invA - b / (a ** 2 + a * b * w_norm_sqrd) * numpy.dot(self.invA, numpy.outer(w, w))) # TODO: Add integer mutation i_I_R component self.sigma = self.sigma * numpy.exp(1.0 / self.d * ((self.psucc - self.ptarg) / (1.0 - self.ptarg))) def _infeasible_update(self, individual): if not hasattr(individual.fitness, "constraint_violation"): return if self.constraint_vecs is None: shape = len(individual.fitness.constraint_violation), self.dim self.constraint_vecs = numpy.zeros(shape) for i in range(self.constraint_vecs.shape[0]): if individual.fitness.constraint_violation[i]: self.constraint_vecs[i] = (1 - self.cconst) * self.constraint_vecs[i] \ + self.cconst * individual._y W = numpy.dot(self.invA, self.constraint_vecs.T).T # M x N constraint_violation = numpy.sum(individual.fitness.constraint_violation) A_prime = ( self.A - self.beta / constraint_violation * numpy.sum( list( numpy.outer(self.constraint_vecs[i], W[i]) / numpy.dot(W[i], W[i]) for i in range(self.constraint_vecs.shape[0]) if individual.fitness.constraint_violation[i] ), axis=0 ) ) try: self.invA = numpy.linalg.inv(A_prime) except numpy.linalg.LinAlgError: warnings.warn("Singular matrix inversion, " "invalid update in CMA-ES ignored", RuntimeWarning) else: self.A = A_prime def update(self, population): """Update the current covariance matrix strategy from the *population*. :param population: A list of individuals from which to update the parameters. """ valid_population = [ind for ind in population if ind.fitness.valid] invalid_population = [ind for ind in population if not ind.fitness.valid] if len(valid_population) > 0: # Rank 1 update valid_population.sort(key=lambda ind: ind.fitness, reverse=True) if not hasattr(self.parent, "fitness"): lambda_succ = len(valid_population) else: lambda_succ = sum(self.parent.fitness <= ind.fitness for ind in valid_population) # Use len(valid) to not account for individuals violating constraints self._rank1update(valid_population[0], float(lambda_succ) / len(valid_population)) if len(invalid_population) > 0: # Learn constraint from all invalid individuals for ind in invalid_population: self._infeasible_update(ind) # Used to monitor the convariance matrix conditioning self.condition_number = numpy.linalg.cond(self.A) C = numpy.dot(self.A, self.A.T) self.i_I_R = numpy.flatnonzero(2 * self.sigma * numpy.diag(C)**0.5 < self.S_int)