Skip to content

[7.17][ML] Improve forecasting for time series with step changes (#2591) #2593

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@

//=== Regressions

== {es} version 7.17.16

=== Enhancements

* Improve forecasting for time series with step changes. (See {ml-pull}#2591[2591],
issue: {ml-issue}2466[#2466]).

== {es} version 7.17.13

=== Enhancements
Expand Down
74 changes: 60 additions & 14 deletions include/maths/common/CNaiveBayes.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,43 @@ class MATHS_COMMON_EXPORT CNaiveBayesFeatureDensityFromPrior final
TPriorPtr m_Prior;
};

//! \brief Enables using custom feature weights in class prediction.
class CNaiveBayesFeatureWeight {
public:
virtual ~CNaiveBayesFeatureWeight() = default;
virtual void add(std::size_t class_, double logLikelihood) = 0;
virtual double calculate() const = 0;
};

//! \brief Implements a Naive Bayes classifier.
class MATHS_COMMON_EXPORT CNaiveBayes {
public:
using TDoubleDoublePr = std::pair<double, double>;
using TDoubleSizePr = std::pair<double, std::size_t>;
using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
using TDoubleSizePrVecDoublePr = std::pair<TDoubleSizePrVec, double>;
using TDouble1Vec = core::CSmallVector<double, 1>;
using TDouble1VecVec = std::vector<TDouble1Vec>;
using TOptionalDouble = boost::optional<double>;
using TFeatureWeightProvider = std::function<CNaiveBayesFeatureWeight&()>;

private:
//! \brief All features have unit weight in class prediction.
class CUnitFeatureWeight : public CNaiveBayesFeatureWeight {
public:
void add(std::size_t, double) override {}
double calculate() const override { return 1.0; }
};

class CUnitFeatureWeightProvider {
public:
CUnitFeatureWeight& operator()() const { return m_UnitWeight; }

private:
mutable CUnitFeatureWeight m_UnitWeight;
};

public:
explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
double decayRate = 0.0,
TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble());
explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate = 0.0);
CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
const SDistributionRestoreParams& params,
core::CStateRestoreTraverser& traverser);
Expand All @@ -184,6 +208,9 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
//! Check if any training data has been added initialized.
bool initialized() const;

//! Get the number of classes.
std::size_t numberClasses() const;

//! This can be used to optionally seed the class counts
//! with \p counts. These are added on to data class counts
//! to compute the class posterior probabilities.
Expand All @@ -210,27 +237,53 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
//!
//! \param[in] n The number of class probabilities to estimate.
//! \param[in] x The feature values.
//! \param[in] weightProvider Computes a feature weight from the class
//! conditional log-likelihood of the feature value. It should be in
//! the range [0,1]. The smaller the value the less impact the feature
//! has on class selection.
//! \return The class probabilities and the minimum feature weight.
//! \note \p x size should be equal to the number of features.
//! A feature is missing is indicated by passing an empty vector
//! for that feature.
TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const;
TDoubleSizePrVecDoublePr highestClassProbabilities(
std::size_t n,
const TDouble1VecVec& x,
const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;

//! Get the probability of the class labeled \p label for \p x.
//!
//! \param[in] label The label of the class of interest.
//! \param[in] x The feature values.
//! \param[in] weightProvider Computes a feature weight from the class
//! conditional log-likelihood of the feature value. It should be in
//! the range [0,1]. The smaller the value the less impact the feature
//! has on class selection.
//! \return The class probabilities and the minimum feature weight.
//! conditional distributions.
//! \note \p x size should be equal to the number of features.
//! A feature is missing is indicated by passing an empty vector
//! for that feature.
double classProbability(std::size_t label, const TDouble1VecVec& x) const;
TDoubleDoublePr classProbability(std::size_t label,
const TDouble1VecVec& x,
const TFeatureWeightProvider& weightProvider =
CUnitFeatureWeightProvider{}) const;

//! Get the probabilities of all the classes for \p x.
//!
//! \param[in] x The feature values.
//! \param[in] weightProvider Computes a feature weight from the class
//! conditional log-likelihood of the feature value. It should be in
//! the range [0,1]. The smaller the value the less impact the feature
//! has on class selection.
//! \return The class probabilities and the minimum feature weight.
//! A feature is missing is indicated by passing an empty vector
//! for that feature.
//! \note \p x size should be equal to the number of features.
//! A feature is missing is indicated by passing an empty vector
//! for that feature.
TDoubleSizePrVec classProbabilities(const TDouble1VecVec& x) const;
TDoubleSizePrVecDoublePr
classProbabilities(const TDouble1VecVec& x,
const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;

//! Debug the memory used by this object.
void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const;
Expand Down Expand Up @@ -298,13 +351,6 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
bool validate(const TDouble1VecVec& x) const;

private:
//! It is not always appropriate to use features with very low
//! probability in all classes to discriminate: the class choice
//! will be very sensitive to the underlying conditional density
//! model. This is a cutoff (for the minimum maximum class log
//! likelihood) in order to use a feature.
TOptionalDouble m_MinMaxLogLikelihoodToUseFeature;

//! Controls the rate at which data are aged out.
double m_DecayRate;

Expand Down
3 changes: 1 addition & 2 deletions lib/core/CStateRestoreTraverser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ namespace core {
CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) {
}

CStateRestoreTraverser::~CStateRestoreTraverser() {
}
CStateRestoreTraverser::~CStateRestoreTraverser() = default;

bool CStateRestoreTraverser::haveBadState() const {
return m_BadState;
Expand Down
Loading