Program Listing for File minibatchOptimization.h¶
↰ Return to documentation for file (include/parpeoptimization/minibatchOptimization.h
)
#ifndef PARPE_OPTIMIZATION_MINIBATCH_OPTIMIZATION_H
#define PARPE_OPTIMIZATION_MINIBATCH_OPTIMIZATION_H
#include <parpeoptimization/optimizationOptions.h>
#include <parpeoptimization/optimizationProblem.h>
#include <parpeoptimization/optimizationResultWriter.h>
#include <parpecommon/misc.h>
#include <parpecommon/model.h>
#include <vector>
#include <cmath>
#include <random>
#include <algorithm>
#include <iostream>
#include <sstream>
#include <functional>
#include <numeric>
#include <gsl/gsl-lite.hpp>
namespace parpe {
enum class minibatchExitStatus {
gradientNormConvergence, maxEpochsExceeded, invalidNumber
};
enum class learningRateInterp {
linear, inverseLinear, logarithmic
};
enum class interceptType {
none, reduceStep, reduceStepAndRestart
};
template<typename T>
class MinibatchOptimizationProblem: public OptimizationProblem {
public:
MinibatchOptimizationProblem() = default;
MinibatchOptimizationProblem(std::unique_ptr<SummedGradientFunction<T>> costFun,
std::unique_ptr<Logger> logger) :
OptimizationProblem(costFun, logger) {
}
MinibatchOptimizationProblem(MinibatchOptimizationProblem const& other) = delete;
~MinibatchOptimizationProblem() override = default;
virtual std::vector<T> getTrainingData() const = 0;
SummedGradientFunction<T>* getGradientFunction() const {
auto summedGradientFunction = dynamic_cast<SummedGradientFunction<T>*>(cost_fun_.get());
Ensures(summedGradientFunction != nullptr);
return summedGradientFunction;
}
};
class LearningRateUpdater {
public:
LearningRateUpdater(int maxEpochs,
learningRateInterp learningRateInterpMode);
void updateLearningRate(int iteration);
void reduceLearningRate();
void increaseLearningRate();
double getCurrentLearningRate();
void setReductionFactor(double newReductionFactor);
void setMaxEpochs(int newMaxEpochs);
void setStartLearningRate(double learningRate);
void setEndLearningRate(double learningRate);
private:
int maxEpochs = 0;
double currentLearningRate = 0.0;
double reductionFactor = 4.0;
double startLearningRate = 0.1;
double endLearningRate = 0.001;
learningRateInterp learningRateInterpMode = learningRateInterp::linear;
};
class ParameterUpdater {
public:
virtual void updateParameters(double learningRate,
int iteration,
gsl::span<const double> gradient,
gsl::span<double> parameters,
gsl::span<const double> lowerBounds = gsl::span<const double>(),
gsl::span<const double> upperBounds = gsl::span<const double>()) = 0;
virtual void undoLastStep() = 0;
virtual void clearCache() = 0;
virtual void initialize(unsigned int numParameters) = 0;
virtual ~ParameterUpdater() = default;
};
class ParameterUpdaterVanilla: public ParameterUpdater {
public:
ParameterUpdaterVanilla() = default;
void updateParameters(double learningRate,
int iteration,
gsl::span<const double> gradient,
gsl::span<double> parameters,
gsl::span<const double> lowerBounds = gsl::span<const double>(),
gsl::span<const double> upperBounds = gsl::span<const double>()) override;
void undoLastStep() override;
void clearCache() override;
void initialize(unsigned int numParameters) override;
};
class ParameterUpdaterRmsProp: public ParameterUpdater {
public:
ParameterUpdaterRmsProp() = default;
void updateParameters(double learningRate,
int iteration,
gsl::span<const double> gradient,
gsl::span<double> parameters,
gsl::span<const double> lowerBounds = gsl::span<const double>(),
gsl::span<const double> upperBounds = gsl::span<const double>()) override;
void undoLastStep() override;
void clearCache() override;
void initialize(unsigned int numParameters) override;
private:
double decayRate = 0.9;
double delta = 1e-7;
std::vector<double> gradientNormCache;
std::vector<double> oldGradientNormCache;
};
class ParameterUpdaterMomentum: public ParameterUpdater {
public:
ParameterUpdaterMomentum() = default;
void updateParameters(double learningRate,
int iteration,
gsl::span<const double> gradient,
gsl::span<double> parameters,
gsl::span<const double> lowerBounds = gsl::span<const double>(),
gsl::span<const double> upperBounds = gsl::span<const double>()) override;
void undoLastStep() override;
void clearCache() override;
void initialize(unsigned int numParameters) override;
private:
double decayRate = 0.8;
std::vector<double> momentum;
std::vector<double> oldMomentum;
};
class ParameterUpdaterAdam: public ParameterUpdater {
public:
ParameterUpdaterAdam() = default;
void updateParameters(double learningRate,
int iteration,
gsl::span<const double> gradient,
gsl::span<double> parameters,
gsl::span<const double> lowerBounds = gsl::span<const double>(),
gsl::span<const double> upperBounds = gsl::span<const double>()) override;
void undoLastStep() override;
void clearCache() override;
void initialize(unsigned int numParameters) override;
private:
double decayRateGradient = 0.9;
double decayRateGradientNorm = 0.9;
double delta = 1e-7;
std::vector<double> gradientNormCache;
std::vector<double> oldGradientNormCache;
std::vector<double> gradientCache;
std::vector<double> oldGradientCache;
};
class ParameterUpdaterAdamClassic: public ParameterUpdater {
public:
ParameterUpdaterAdamClassic() = default;
void updateParameters(double learningRate,
int iteration,
gsl::span<const double> gradient,
gsl::span<double> parameters,
gsl::span<const double> lowerBounds = gsl::span<const double>(),
gsl::span<const double> upperBounds = gsl::span<const double>()) override;
void undoLastStep() override;
void clearCache() override;
void initialize(unsigned int numParameters) override;
private:
double decayRateGradient = 0.9;
double decayRateGradientNorm = 0.999;
double delta = 1e-7;
std::vector<double> gradientNormCache;
std::vector<double> oldGradientNormCache;
std::vector<double> gradientCache;
std::vector<double> oldGradientCache;
};
template<typename T>
std::vector<std::vector<T>> getBatches(gsl::span<const T> data,
int batchSize) {
int numBatches = ceil(static_cast<double>(data.size()) / batchSize);
std::vector < std::vector < T >> batches(numBatches, std::vector<T>());
int batchIdx = -1;
for (int i = 0; static_cast<typename decltype (data)::index_type>(i) < data.size(); ++i) {
if (i % batchSize == 0) {
++batchIdx;
int remaining = data.size() - i;
batches[batchIdx].resize(std::min(batchSize, remaining));
}
batches[batchIdx][i % batchSize] = data[i];
}
return batches;
}
double getVectorNorm(gsl::span<const double> v);
std::vector<double> getVectorDifference(gsl::span<const double> v,
gsl::span<const double> w);
template<typename BATCH_ELEMENT>
class MinibatchOptimizer {
public:
std::tuple<int, double, std::vector<double> > optimize(SummedGradientFunction<BATCH_ELEMENT> const& f,
gsl::span<const BATCH_ELEMENT> data,
gsl::span<const double> initialParameters,
gsl::span<const double> lowerParameterBounds,
gsl::span<const double> upperParameterBounds,
OptimizationReporter *reporter,
Logger *logger_) {
Expects((unsigned) f.numParameters() == initialParameters.size());
Logger logger = logger_ ? *logger_ : Logger();
// We don't change the user inputs but work with copies
std::vector<double> parameters(initialParameters.begin(), initialParameters.end());
std::vector<double> oldParameters(parameters.size(), NAN);
std::vector<BATCH_ELEMENT> shuffledData(data.begin(), data.end());
std::vector<double> gradient(parameters.size(), 0.0);
std::vector<double> oldGradient(parameters.size(), NAN);
std::random_device rd;
std::mt19937 rng(rd());
double cost = NAN;
int iteration = 0;
int subsequentFails = 0;
learningRateUpdater->setMaxEpochs(maxEpochs);
parameterUpdater->initialize(initialParameters.size());
if (reporter) {
reporter->starting(initialParameters);
reporter->result_writer_->setLoggingEachFunctionEvaluation(false, true);
reporter->result_writer_->setLoggingEachIteration(false);
}
for (int epoch = 0; epoch < maxEpochs; ++epoch) {
auto epochLogger = logger.getChild(std::string("e") + std::to_string(epoch));
// Create randomized batches
std::shuffle(shuffledData.begin(), shuffledData.end(), rng);
auto batches = getBatches<BATCH_ELEMENT>(shuffledData, batchSize);
// Update learning rate according to epoch
learningRateUpdater->updateLearningRate(epoch);
for (int batchIdx = 0; (unsigned) batchIdx < batches.size(); ++batchIdx) {
auto batchLogger = epochLogger->getChild(std::string("b") + std::to_string(batchIdx));
iteration++;
auto status = evaluate(f, parameters, batches[batchIdx], cost, gradient, batchLogger.get(), reporter);
// Give some output
learningRate = learningRateUpdater->getCurrentLearningRate();
std::stringstream ss;
ss << ": Cost: " << cost << " |g|2: " << getVectorNorm(gradient) << " Batch: " << batches[batchIdx]
<< " LearningRate: " << learningRate << std::endl;
batchLogger->logmessage(loglevel::debug, ss.str().c_str());
if (status == functionEvaluationFailure) {
// Check, if the interceptor should be used (should always be the case, except for study purpose...
if (interceptor > interceptType::none)
status = rescueInterceptor(parameters, oldParameters, gradient, oldGradient,
lowerParameterBounds, upperParameterBounds, cost, subsequentFails,
iteration, f, batches[batchIdx], batchLogger.get(), reporter);
// If we still have a failure, stop optimization
if (status == functionEvaluationFailure)
return finish(cost, parameters, minibatchExitStatus::invalidNumber, reporter, batchLogger.get());
} else {
// Cost function evaluation was successful, so we can increase the step size
subsequentFails = std::max(subsequentFails - 1, 0);
learningRateUpdater->increaseLearningRate();
// Overwrite old parameters and old gradient, since they won't be needed any more
std::copy(gradient.begin(), gradient.end(), oldGradient.begin());
std::copy(parameters.begin(), parameters.end(), oldParameters.begin());
}
/* Update parameters after successful gradient evaluation */
handleStep(parameters, oldParameters, gradient, lowerParameterBounds, upperParameterBounds,
cost, iteration, f, batches[batchIdx], batchLogger.get(), reporter);
}
// epoch finished, write the values in HDF5-file
if (reporter)
reporter->iterationFinished(parameters, cost, gradient);
if (getVectorNorm(gradient) <= gradientNormThreshold) {
// evaluate on full data set
auto dataSpan = std::vector < BATCH_ELEMENT > (data.cbegin(), data.cend());
evaluate(f, parameters, dataSpan, cost, gradient, epochLogger.get(), reporter);
return finish(cost, parameters, minibatchExitStatus::gradientNormConvergence, reporter,
epochLogger.get());
}
}
// evaluate on full data set
auto dataSpan = std::vector < BATCH_ELEMENT > (data.cbegin(), data.cend());
evaluate(f, parameters, dataSpan, cost, gradient, &logger, reporter);
return finish(cost, parameters, minibatchExitStatus::maxEpochsExceeded, reporter, &logger);
}
FunctionEvaluationStatus evaluate(SummedGradientFunction<BATCH_ELEMENT> const& f,
gsl::span<const double> parameters,
std::vector<BATCH_ELEMENT> datasets,
double &cost,
gsl::span<double> gradient,
Logger *logger,
OptimizationReporter *reporter) const {
if (reporter) {
reporter->beforeCostFunctionCall(parameters);
reporter->logger_->setPrefix(logger->getPrefix());
}
double cpuTime = 0.0;
auto status = f.evaluate(parameters, datasets, cost, gradient, logger, &cpuTime);
// Normalize to batch size
double batchSize = datasets.size();
cost /= batchSize;
for (auto &g : gradient)
g /= batchSize;
if (reporter) {
reporter->cpu_time_iteration_sec_ += cpuTime;
reporter->cpu_time_total_sec_ += cpuTime;
reporter->afterCostFunctionCall(parameters, cost, gradient);
}
return status;
}
std::tuple<int, double, std::vector<double> > finish(double cost,
std::vector<double> const& parameters,
minibatchExitStatus status,
OptimizationReporter *reporter,
Logger *logger) {
if (logger) {
switch (status) {
case minibatchExitStatus::invalidNumber:
logger->logmessage(loglevel::error, "Mini-batch cost function evaluation failed.");
break;
case minibatchExitStatus::gradientNormConvergence:
logger->logmessage(loglevel::info, "Convergence: gradientNormThreshold reached.");
break;
case minibatchExitStatus::maxEpochsExceeded:
logger->logmessage(loglevel::info, "Number of epochs exceeded.");
}
}
if (reporter)
reporter->finished(cost, parameters, (int) status);
return std::tuple<int, double, std::vector<double> >((int) status, cost, parameters);
}
FunctionEvaluationStatus rescueInterceptor(gsl::span<double> parameters,
gsl::span<double> oldParameters,
gsl::span<double> gradient,
gsl::span<double> oldGradient,
gsl::span<const double> lowerParameterBounds,
gsl::span<const double> upperParameterBounds,
double &cost,
int &subsequentFails,
int iteration,
SummedGradientFunction<BATCH_ELEMENT> const& f,
std::vector<BATCH_ELEMENT> datasets,
Logger *logger,
OptimizationReporter *reporter) {
// initialize diagnostic variables
int maxSubsequentFails = 10;
bool finalFail = false;
bool initialFail = false;
cost = NAN;
FunctionEvaluationStatus status = functionEvaluationFailure;
std::vector<double> parDifference(parameters.size(), 0.0);
std::stringstream ss;
if (reporter) {
reporter->beforeCostFunctionCall(parameters);
reporter->logger_->setPrefix(logger->getPrefix());
}
// debug output
if (logger) {
std::vector<double> firstDifference = getVectorDifference(parameters, oldParameters);
std::stringstream first_ss;
first_ss << " Interceptor is active! Former step size: " << getVectorNorm(firstDifference) << std::endl;
logger->logmessage(loglevel::debug, first_ss.str().c_str());
}
// Cost function evaluation failed: We need to intercept
while (status == functionEvaluationFailure) {
// If the objective function evaluation failed, we want to undo the step
++subsequentFails;
parameterUpdater->undoLastStep();
std::copy(oldGradient.begin(), oldGradient.end(), gradient.begin());
std::copy(oldParameters.begin(), oldParameters.end(), parameters.begin());
// note the previous parameter step for debugging
parDifference = getVectorDifference(parameters, oldParameters);
// Check if there are NaNs in the parameter vector now (e.g., fail at first iteration)
if (std::any_of(parameters.begin(), parameters.end(), [](double d) {return std::isnan(d);}))
initialFail = true;
if (subsequentFails >= maxSubsequentFails)
finalFail = true;
// If nothing helps and no cold restart wanted: cancel optimization
if (initialFail || (finalFail && interceptor != interceptType::reduceStepAndRestart)) {
logger->logmessage(loglevel::debug, "Failure at initial point of optimization. Stopping.");
return functionEvaluationFailure;
}
if (finalFail) {
/* Reducing step size did not work.
* Do a cold restart and take a very small step.
*/
ss << "Final failure. Rescue interceptor could not recover optimization run." << std::endl;
subsequentFails = 0;
parameterUpdater->clearCache();
learningRateUpdater->setReductionFactor(1e-5);
} else {
/* We did not fail too often: we reduce the step size */
learningRateUpdater->reduceLearningRate();
ss << "Failure. Reducing learning rate and retry..." << std::endl;
}
// debug output
ss << ": Interceptor, before new evaluation: " << std::endl
<< " New cost: " << cost
<< ", new |g|2: " << getVectorNorm(gradient)
<< ", new LearningRate: " << learningRate
<< ", real step length: " << getVectorNorm(parDifference)
<< ", Failures: " << subsequentFails
<< std::endl;
// Do the next step
learningRate = learningRateUpdater->getCurrentLearningRate();
parameterUpdater->updateParameters(learningRate, iteration, gradient, parameters, lowerParameterBounds,
upperParameterBounds);
// Re-evaluate the cost function and hope for the best
status = evaluate(f, parameters, datasets, cost, gradient, logger, reporter);
// get Difference for debugging
parDifference = getVectorDifference(parameters, oldParameters);
// debug output
ss << ": Interceptor, after new evaluation: " << std::endl
<< " New Cost: " << cost
<< " new |g|2: " << getVectorNorm(gradient)
<< " new LearningRate: " << learningRate
<< "real step length: " << getVectorNorm(parDifference)
<< std::endl;
if (logger)
logger->logmessage(loglevel::debug, ss.str().c_str());
}
return status;
}
void handleStep(gsl::span<double> parameters,
gsl::span<double> oldParameters,
gsl::span<double> gradient,
gsl::span<const double> lowerParameterBounds,
gsl::span<const double> upperParameterBounds,
double cost,
int iteration,
SummedGradientFunction<BATCH_ELEMENT> const& f,
std::vector<BATCH_ELEMENT> datasets,
Logger *logger,
OptimizationReporter *reporter) {
/* Retrieve step length and try a full step */
double stepLength = learningRateUpdater->getCurrentLearningRate();
parameterUpdater->updateParameters(stepLength, iteration, gradient, parameters,
lowerParameterBounds, upperParameterBounds);
/* If no line search desired: that's it! */
if (lineSearchSteps == 0)
return;
/* Define lambda function for step length evaluation */
std::function<double (double)> evalLineSearch = [&f, &datasets, iteration,
¶meters, &oldParameters, &gradient,
&lowerParameterBounds, &upperParameterBounds,
&logger, &reporter, this](double alpha) {
/* Reset oldParameters and re-update with new step length */
std::copy(oldParameters.begin(), oldParameters.end(), parameters.begin());
parameterUpdater->undoLastStep();
parameterUpdater->updateParameters(alpha, iteration, gradient, parameters,
lowerParameterBounds, upperParameterBounds);
/* Write new cost function value and return */
double newCost = NAN;
evaluate(f, parameters, datasets, newCost, gsl::span<double>(), logger, reporter);
return newCost;
};
/* From here, we check for a decreasing cost function,
* otherwise, we already exited.
*
* The idea:
* cost0 is the previous cost before the parameter update,
* we want the update to be lower.
*
* First compute a naive step as if there was no line-search -> get cost1
* If cost1 > cost0: Try to improve -> get cost2
* If also cost2 > cost0:
* If only short line-search is desired: take min(cost1, cost2)
* If more longer line-search is desired: step into performLineSearch()
* */
double cost1 = evalLineSearch(stepLength);
/* Return on improvement */
if (cost1 <= cost) {
logger->logmessage(loglevel::debug, " Line-Search: Step was good right away...");
return;
}
// Debugging output
std::stringstream line_ss;
std::stringstream parabola_ss;
std::vector<double> parDifference = getVectorDifference(parameters, oldParameters);
/* No improvement: compute update direction */
std::vector<double> direction(parameters.size(), NAN);
for (unsigned int i = 0; i < gradient.size(); ++i)
direction[i] = parameters[i] - oldParameters[i];
double dirNorm = getVectorNorm(direction);
for (unsigned int i = 0; i < gradient.size(); ++i)
direction[i] /= dirNorm;
/* Is the step direction a descent direction? */
double dirGradient = std::inner_product(
direction.begin(), direction.end(), gradient.begin(), 0.0);
if (dirGradient > 0) {
/* No descent direction, no hope for improvement:
* Try to do something smart anyway */
parabola_ss << " Line-Search: Not a descent direction! " << std::endl;
/* Fit a parabola to decide whether a smaller or
* a bigger step seems more promising */
double newStepLength = NAN;
if (cost1 < cost - 2.0 * dirNorm * dirGradient) {
parabola_ss << " Stepping longer now. " << std::endl;
newStepLength = stepLength * 2.0;
} else {
parabola_ss << " Stepping shorter now. " << std::endl;
newStepLength = stepLength / 2.0;
}
/* re-evaluate cost function */
double cost2 = evalLineSearch(newStepLength);
if (cost2 > cost1) {
/* The parabola idea didn't work. Just admit the step, as it is */
cost1 = evalLineSearch(stepLength);
parabola_ss << " Step adaption did not work..." << std::endl;
}
parabola_ss << " cost0: " << cost << " (step length: " << getVectorNorm(parDifference)
<< "), cost1: " << cost1 << " (step length: " << stepLength
<< "), cost2: " << cost2 << " (step length: " << newStepLength
<< ") " << std::endl;
if (logger)
logger->logmessage(loglevel::debug, parabola_ss.str().c_str());
/* We tried all we could */
return;
}
/* Original step was too big, but we're facing a descent direction
* Propose a new step based on a parabolic interpolation */
double newStepLength = -0.5 * dirGradient * std::pow(stepLength, 2.0) /
(cost1 - cost - dirGradient * stepLength);
double cost2 = evalLineSearch(newStepLength);
// Debugging output
if (logger) {
line_ss << " Line-Search: two steps were done, results are: " << std::endl
<< " cost0: " << cost << " (step length: " << getVectorNorm(parDifference)
<< "), cost1: " << cost1 << " (step length: " << stepLength
<< "), cost2: " << cost2 << " (step length: " << newStepLength
<< ")" << std::endl;
logger->logmessage(loglevel::debug, line_ss.str().c_str());
}
/* If we did improve, return, otherwise iterate */
if (cost2 < cost) return;
if (lineSearchSteps < 2) {
/* No more iteration wanted, but 2nd try was better than 1st */
if (cost2 <= cost1) return;
/* 1st try was better than 2nd, use it */
cost1 = evalLineSearch(stepLength);
} else {
/* No descent found and line search option is set: iterate! */
std::stringstream line_ss;
if (logger) {
line_ss << " Line-Search: Need to go to third order approximation, looping... " << std::endl;
logger->logmessage(loglevel::debug, line_ss.str().c_str());
}
performLineSearch(stepLength,
newStepLength,
cost,
cost1,
cost2,
dirGradient,
evalLineSearch);
}
}
void performLineSearch(double alpha1,
double alpha2,
double cost,
double cost1,
double cost2,
double dirGradient,
std::function<double (double)> costFunEvaluate) {
/* From here on, we will use cubic interpolation.
* We need to compute the matrix-vector multiplication
*
* a = 1/tmp_D * [ tmp_11, tmp_12 ] [tmp_v1]
* b = 1/tmp_D * [ tmp_21, tmp_22 ] [tmp_v2]
*
* in order to find the possible minimum at
*
* alpha3 = -b + sqrt(b*b - 3*a*dirGradient ) / (3*a)
*
* Possibly, we have to iterate this process. */
/* Declare variables which will be needed outside the loop */
double cost3 = NAN;
for (int iStep = 2; iStep <= lineSearchSteps; ++iStep) {
/* declare temporary variables */
double tmp_D = std::pow(alpha2 * alpha1, 2.0) * (alpha2 - alpha1);
double tmp_11 = std::pow(alpha1, 2.0);
double tmp_12 = -1.0 * std::pow(alpha2, 2.0);
double tmp_21 = -1.0 * std::pow(alpha1, 3.0);
double tmp_22 = std::pow(alpha2, 3.0);
double tmp_v1 = cost2 - cost - dirGradient * alpha2;
double tmp_v2 = cost1 - cost - dirGradient * alpha1;
double a = (tmp_11 * tmp_v1 + tmp_12 * tmp_v2) / tmp_D;
double b = (tmp_21 * tmp_v1 + tmp_22 * tmp_v2) / tmp_D;
/* Compute possible new step length */
double alpha3 = (-b + std::sqrt(b*b - 3.0*a*dirGradient)) / (3.0*a);
/* Evaluate line search function at alpha2 */
double cost3 = costFunEvaluate(alpha3);
/* If improvement, return */
if (cost3 < cost) return;
/* If no improvement, update values and re-iterate */
if (iStep < lineSearchSteps) {
cost1 = cost2;
cost2 = cost3;
alpha1 = alpha2;
alpha2 = alpha3;
}
}
/* No good step was found, but the max number
* of line search steps was reached */
if (cost3 < std::min(cost1, cost2)) return;
if (cost1 < cost2) {
cost1 = costFunEvaluate(alpha1);
} else {
cost2 = costFunEvaluate(alpha2);
}
}
std::unique_ptr<ParameterUpdater> parameterUpdater = std::make_unique<ParameterUpdaterVanilla>();
// Set some default values
interceptType interceptor = interceptType::reduceStepAndRestart;
int lineSearchSteps = 0;
int batchSize = 1;
int maxEpochs = 1;
double gradientNormThreshold = 0.0;
double learningRate = 0.001;
std::unique_ptr<LearningRateUpdater> learningRateUpdater = std::make_unique < LearningRateUpdater
> (maxEpochs, learningRateInterp::linear);
};
void setMinibatchOption(const std::pair<const std::string, const std::string> &pair,
MinibatchOptimizer<int>* optimizer);
template<typename BATCH_ELEMENT>
std::unique_ptr<MinibatchOptimizer<BATCH_ELEMENT>> getMinibatchOptimizer(OptimizationOptions const& options) {
auto optim = std::make_unique<MinibatchOptimizer<BATCH_ELEMENT>>();
options.for_each<MinibatchOptimizer<BATCH_ELEMENT>*>(setMinibatchOption, optim.get());
return optim;
}
std::tuple<int, double, std::vector<double> > runMinibatchOptimization(MinibatchOptimizationProblem<int> *problem);
template<typename T>
void clipToBounds(gsl::span<const T> lowerBounds,
gsl::span<const T> upperBounds,
gsl::span<T> x) {
if (lowerBounds.empty() && upperBounds.empty())
return;
Expects(lowerBounds.size() == upperBounds.size());
Expects(lowerBounds.size() == x.size());
for (int i = 0; static_cast<typename gsl::span<const T>::index_type>(i) < x.size(); ++i)
x[i] = std::min(std::max(lowerBounds[i], x[i]), upperBounds[i]);
}
} // namespace parpe
#endif