13#include <pcg_random.hpp>
36 std::stable_sort(indices.begin(), indices.end(), [&y](
int i,
int j) { return y(i) < y(j); });
38 x = x(indices, Eigen::all).eval();
39 y = y(indices, Eigen::all).eval();
56 template<
typename Derived>
double var(Eigen::MatrixBase<Derived>
const& data) {
58 Derived::ColsAtCompileTime == 1 || Derived::ColsAtCompileTime == Eigen::Dynamic,
59 "var: expected a vector (single column)"
62 if (data.rows() == 0) {
63 throw std::invalid_argument(
"var: data must have at least one row");
66 if (data.rows() == 1) {
70 double const mean =
static_cast<double>(data.mean());
71 return (data.array().template cast<double>() -
mean).square().sum() /
static_cast<double>(data.rows() - 1);
75 template<
typename Derived>
double sd(Eigen::MatrixBase<Derived>
const& data) {
76 return std::sqrt(
var(data));
122 std::map<types::GroupId, int>
group_indices(std::set<types::GroupId>
const& groups);
Statistical infrastructure for training and evaluation.
Definition ConfusionMatrix.hpp:11
void sort(types::FeatureMatrix &x, Y &y)
Sort a feature matrix and a response vector by the response values.
Definition Stats.hpp:33
double var(Eigen::MatrixBase< Derived > const &data)
Sample variance of a vector (unbiased, n-1 denominator).
Definition Stats.hpp:56
double sd(Eigen::MatrixBase< Derived > const &data)
Sample standard deviation of a vector — sqrt(var(data)).
Definition Stats.hpp:75
std::set< types::GroupId > unique(types::GroupIdVector const &column)
Unique group labels in a response vector.
pcg32 RNG
Definition Stats.hpp:24
types::Outcome majority_vote(std::vector< types::Outcome > const &preds)
Majority vote over a sequence of integer-coded class labels.
std::map< types::GroupId, int > group_indices(std::set< types::GroupId > const &groups)
Map each label in groups to its index in iteration order.
types::Outcome mean(std::vector< types::Outcome > const &preds)
Arithmetic mean of a sequence of outcome values.
Eigen::Matrix< Feature, Eigen::Dynamic, Eigen::Dynamic > FeatureMatrix
Dynamic-size matrix of feature values.
Definition Types.hpp:33
Eigen::Matrix< GroupId, Eigen::Dynamic, 1 > GroupIdVector
Dynamic-size column vector of internal group labels.
Definition Types.hpp:39
Eigen::Matrix< Feature, Eigen::Dynamic, 1 > FeatureVector
Dynamic-size column vector of feature values.
Definition Types.hpp:36
Feature Outcome
Scalar type for predictions (float for both classification and regression).
Definition Types.hpp:30
std::vector< int > range_vector(Size n)
Build the sequence [0, 1, ..., n - 1] as std::vector<int>.
Definition RangeVector.hpp:26