ppforest2 v0.1.0
Projection Pursuit Decision Trees and Random Forests
Loading...
Searching...
No Matches
Stats.hpp
Go to the documentation of this file.
1#pragma once
2
4#include "utils/Types.hpp"
5
6#include <algorithm>
7#include <cmath>
8#include <map>
9#include <set>
10#include <stdexcept>
11#include <vector>
12#include <Eigen/Dense>
13#include <pcg_random.hpp>
14
23namespace ppforest2::stats {
24 using RNG = pcg32;
25
26
33 template<typename Y> void sort(types::FeatureMatrix& x, Y& y) {
34 std::vector<int> indices = utils::range_vector(x.rows());
35
36 std::stable_sort(indices.begin(), indices.end(), [&y](int i, int j) { return y(i) < y(j); });
37
38 x = x(indices, Eigen::all).eval();
39 y = y(indices, Eigen::all).eval();
40 }
41
48 std::set<types::GroupId> unique(types::GroupIdVector const& column);
49
56 template<typename Derived> double var(Eigen::MatrixBase<Derived> const& data) {
57 static_assert(
58 Derived::ColsAtCompileTime == 1 || Derived::ColsAtCompileTime == Eigen::Dynamic,
59 "var: expected a vector (single column)"
60 );
61
62 if (data.rows() == 0) {
63 throw std::invalid_argument("var: data must have at least one row");
64 }
65
66 if (data.rows() == 1) {
67 return 0.0;
68 }
69
70 double const mean = static_cast<double>(data.mean());
71 return (data.array().template cast<double>() - mean).square().sum() / static_cast<double>(data.rows() - 1);
72 }
73
75 template<typename Derived> double sd(Eigen::MatrixBase<Derived> const& data) {
76 return std::sqrt(var(data));
77 }
78
86
89
101 types::Outcome majority_vote(std::vector<types::Outcome> const& preds);
102
112 types::Outcome mean(std::vector<types::Outcome> const& preds);
113
122 std::map<types::GroupId, int> group_indices(std::set<types::GroupId> const& groups);
123}
Statistical infrastructure for training and evaluation.
Definition ConfusionMatrix.hpp:11
void sort(types::FeatureMatrix &x, Y &y)
Sort a feature matrix and a response vector by the response values.
Definition Stats.hpp:33
double var(Eigen::MatrixBase< Derived > const &data)
Sample variance of a vector (unbiased, n-1 denominator).
Definition Stats.hpp:56
double sd(Eigen::MatrixBase< Derived > const &data)
Sample standard deviation of a vector — sqrt(var(data)).
Definition Stats.hpp:75
std::set< types::GroupId > unique(types::GroupIdVector const &column)
Unique group labels in a response vector.
pcg32 RNG
Definition Stats.hpp:24
types::Outcome majority_vote(std::vector< types::Outcome > const &preds)
Majority vote over a sequence of integer-coded class labels.
std::map< types::GroupId, int > group_indices(std::set< types::GroupId > const &groups)
Map each label in groups to its index in iteration order.
types::Outcome mean(std::vector< types::Outcome > const &preds)
Arithmetic mean of a sequence of outcome values.
Eigen::Matrix< Feature, Eigen::Dynamic, Eigen::Dynamic > FeatureMatrix
Dynamic-size matrix of feature values.
Definition Types.hpp:33
Eigen::Matrix< GroupId, Eigen::Dynamic, 1 > GroupIdVector
Dynamic-size column vector of internal group labels.
Definition Types.hpp:39
Eigen::Matrix< Feature, Eigen::Dynamic, 1 > FeatureVector
Dynamic-size column vector of feature values.
Definition Types.hpp:36
Feature Outcome
Scalar type for predictions (float for both classification and regression).
Definition Types.hpp:30
std::vector< int > range_vector(Size n)
Build the sequence [0, 1, ..., n - 1] as std::vector<int>.
Definition RangeVector.hpp:26