Binarization strategies for multiclass-to-binary reduction. More...

Namespaces
namespace	binarize

namespace	cli
	Command-line interface: argument parsing, subcommands, and benchmark/evaluation orchestration.

namespace	cutpoint

namespace	grouping

namespace	io

namespace	leaf
	Leaf creation strategies.

namespace	math
	Numeric comparison utilities.

namespace	pp

namespace	serialization
	JSON serialization and deserialization for ppforest2 models.

namespace	stats
	Statistical infrastructure for training and evaluation.

namespace	stop

namespace	strategies

namespace	sys
	System-level utilities (process memory measurement).

namespace	test

namespace	types
	Core numeric type aliases for the ppforest2 library.

namespace	utils
	Utility functions for std::map manipulation.

namespace	vars

namespace	viz

Classes
struct	Bagged
	Bootstrap-aggregated model wrapper. More...

class	ClassificationForest
	Random forest of classification trees. More...

class	ClassificationTree
	A projection pursuit decision tree for classification. More...

class	Forest
	Abstract base class for projection pursuit random forests. More...

class	JsonReader
	A small DSL for extracting-and-validating values out of a JSON object with path-aware error messages. More...

class	Model
	Abstract base class for predictive models (trees and forests). More...

struct	NodeContext
	Mutable context accumulating intermediate results during node training. More...

struct	Proportions
	Tag type for requesting vote-proportion predictions. More...

class	RegressionForest
	Random forest of regression trees. More...

class	RegressionTree
	A projection pursuit decision tree for regression. More...

class	TrainingSpec
	Training configuration for projection pursuit trees and forests. More...

class	Tree
	Abstract base class for projection pursuit decision trees. More...

class	TreeBranch
	Internal split node in a projection pursuit tree. More...

class	TreeLeaf
	Leaf node in a projection pursuit tree. More...

class	TreeNode
	Abstract base class for nodes in a projection pursuit tree. More...

class	UserError
	Exception for user-facing input validation errors. More...

struct	VariableImportance
	Grouped result of the variable importance measures. More...

class	VIVisitor
	Visitor that accumulates per-variable contributions for VI2 and VI3. More...

Typedefs
using	BaggedTree = Bagged<Tree>
	Alias for the dominant `Bagged` instantiation in this codebase — a bootstrap-aggregated `Tree`. Inner tree is polymorphic (classification or regression via the `Tree` base); the wrapper itself is mode-agnostic.

Functions
double	error (Model const &model, types::FeatureMatrix const &x, types::OutcomeVector const &y)
	Prediction error of `model` on data `(x, y)`.

template<typename T>
double	error (std::unique_ptr< T > const &m, types::FeatureMatrix const &x, types::OutcomeVector const &y)

template<typename M, typename F>
void	for_each_bag_with_oob (std::vector< std::unique_ptr< Bagged< M > > > const &bags, int n_total, F fn)
	Apply `fn` to every bag in `bags` that has usable OOB rows.

bool	is_classification (Model const &model)
	Whether `model` was trained for classification.

bool	is_classification (Model const *model)
	Pointer overload — null-safe; returns `false` for a null model.

bool	is_classification (TrainingSpec const &spec)
	Whether `spec` describes a classification training run.

bool	is_classification (TrainingSpec::Ptr const &spec)
	Pointer overload — null-safe; returns `false` for a null spec.

bool	is_leaf (TreeNode const &node)
	Whether `node` is a `TreeLeaf`.

bool	is_leaf (TreeNode::Ptr const &node)

bool	is_regression (Model const &model)
	Whether `model` was trained for regression.

bool	is_regression (Model const *model)
	Pointer overload — null-safe; returns `false` for a null model.

bool	is_regression (TrainingSpec const &spec)
	Whether `spec` describes a regression training run.

bool	is_regression (TrainingSpec::Ptr const &spec)
	Pointer overload — null-safe; returns `false` for a null spec.

std::optional< double >	oob_error (Forest const &forest, types::FeatureMatrix const &x, types::GroupIdVector const &y)
	Convenience overload — accepts integer class labels for classification.

std::optional< double >	oob_error (Forest const &forest, types::FeatureMatrix const &x, types::OutcomeVector const &y)
	Out-of-bag error.

template<typename T>
std::optional< double >	oob_error (std::unique_ptr< T > const &m, types::FeatureMatrix const &x, types::GroupIdVector const &y)

template<typename T>
std::optional< double >	oob_error (std::unique_ptr< T > const &m, types::FeatureMatrix const &x, types::OutcomeVector const &y)

stats::ClassificationMetrics::Maybe	oob_metrics (ClassificationForest const &forest, types::FeatureMatrix const &x, types::OutcomeVector const &y)
	Out-of-bag metrics — sentinel-free summary of OOB performance.

stats::RegressionMetrics::Maybe	oob_metrics (RegressionForest const &forest, types::FeatureMatrix const &x, types::OutcomeVector const &y)

types::OutcomeVector	oob_predict (Forest const &forest, types::FeatureMatrix const &x)
	Out-of-bag predictions.

template<typename T>
types::OutcomeVector	oob_predict (std::unique_ptr< T > const &m, types::FeatureMatrix const &x)

types::FeatureMatrix	predict_proportions (Model const &model, types::FeatureMatrix const &x)
	Compute vote proportions for a classification model.

types::FeatureVector	predict_proportions (Model const &model, types::FeatureVector const &x)
	Single-row vote proportions for a classification model.

void	user_error (bool condition, char const *message)
	Throw a UserError if the condition is false.

void	user_error (bool condition, std::string const &message)
	Throw a UserError if the condition is false.

VariableImportance	variable_importance (Forest const &forest, types::FeatureMatrix const &x, types::OutcomeVector const &y, int seed)
	Bundle all three VI measures for a forest.

template<typename T>
VariableImportance	variable_importance (std::unique_ptr< T > const &m, types::FeatureMatrix const &x)

template<typename T>
VariableImportance	variable_importance (std::unique_ptr< T > const &m, types::FeatureMatrix const &x, types::OutcomeVector const &y, int seed)

VariableImportance	variable_importance (Tree const &tree, types::FeatureMatrix const &x)
	Bundle the available VI measures for a single tree (VI2 only).

types::FeatureVector	vi_permuted (Forest const &forest, types::FeatureMatrix const &x, types::OutcomeVector const &y, int seed)
	VI1 — per-variable permuted importance.

template<typename T>
types::FeatureVector	vi_permuted (std::unique_ptr< T > const &m, types::FeatureMatrix const &x, types::OutcomeVector const &y, int seed)

types::FeatureVector	vi_projections (Forest const &forest, int n_vars, types::FeatureVector const *scale=nullptr)
	VI2 for a forest — averaged over non-degenerate trees.

template<typename T>
types::FeatureVector	vi_projections (std::unique_ptr< T > const &m, int n_vars, types::FeatureVector const *scale=nullptr)

types::FeatureVector	vi_projections (Tree const &tree, int n_vars, types::FeatureVector const *scale=nullptr)
	VI2 for a single tree — projection-coefficient importance.

types::FeatureVector	vi_weighted_projections (Forest const &forest, types::FeatureMatrix const &x, types::OutcomeVector const &y, types::FeatureVector const *scale=nullptr)
	VI3 — weighted projection-coefficient importance.

template<typename T>
types::FeatureVector	vi_weighted_projections (std::unique_ptr< T > const &m, types::FeatureMatrix const &x, types::OutcomeVector const &y, types::FeatureVector const *scale=nullptr)

Detailed Description

Binarization strategies for multiclass-to-binary reduction.

Variable selection strategies.

Stop rule strategies that determine when to create leaf nodes.

Projection pursuit strategies.

Grouping strategies that manage group partitions throughout training.

Cutpoint strategies for computing decision cutpoints.

When a node has more than 2 groups, a binarization strategy reduces it to a binary problem. The built-in LargestGap finds the largest gap between sorted projected group means. Future strategies (e.g. closest-pair from da Silva Extension I) can be plugged in.

Contains the abstract Cutpoint interface and concrete implementations that determine the split cutpoint in projected space. The built-in MeanOfMeans uses the midpoint of the two group means.

The Grouping strategy owns the full lifecycle of GroupPartitions: initial construction from training labels (init) and per-node child splitting (split).

For classification, ByLabel constructs from sorted labels and routes groups to children via the binary mapping. For regression (future), ByCutpoint quantile-slices the continuous response and re-clusters children at each node.

Contains the abstract ProjectionPursuit interface and concrete implementations (e.g. PDA) that define how to evaluate and optimise a projection index for separating groups.

Controls tree growth by deciding when a node should stop splitting and become a leaf. The built-in PureNode stops when only one group remains. Future strategies may add max-depth or min-samples rules.

Contains the abstract VariableSelection interface and concrete implementations that select a subset of variables before projection pursuit optimisation. All uses all variables (single trees); Uniform samples uniformly at random (forests).

Typedef Documentation

◆ BaggedTree

using ppforest2::BaggedTree = Bagged<Tree>

Alias for the dominant Bagged instantiation in this codebase — a bootstrap-aggregated Tree. Inner tree is polymorphic (classification or regression via the Tree base); the wrapper itself is mode-agnostic.

Function Documentation

◆ error() [1/2]

double ppforest2::error	(	Model const &	model,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y )

Prediction error of model on data (x, y).

Classification: misclassification rate. Regression: mean squared error. Mode dispatch goes through Model::Visitor so callers don't need to know whether model is classification or regression. Throws UserError for unknown modes.

◆ error() [2/2]

template<typename T>

double ppforest2::error	(	std::unique_ptr< T > const &	m,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y )

◆ for_each_bag_with_oob()

template<typename M, typename F>

void ppforest2::for_each_bag_with_oob	(	std::vector< std::unique_ptr< Bagged< M > > > const &	bags,
		int	n_total,
		F	fn )

Apply fn to every bag in bags that has usable OOB rows.

Skips bags whose bootstrap sample happened to cover every training row (no OOB to evaluate on). Bags whose degenerate() is true are not filtered: that flag propagates from any descendant leaf, so a tree with one deep aborted split still has valid upper branches and meaningful predict(). OOB-based callers (VI permuted/weighted) downweight or zero-out fully-degenerate trees naturally via their score function (e.g. NMSE clipping for regression).

The callback receives (bag, oob_idx, k) where k is the bag's position in the container — needed by callers that derive a per-bag RNG seed from seed ^ k. Iteration order matches the container's order. Use return; inside fn to skip the rest of the body for the current bag (the lambda's return acts as continue).

◆ is_classification() [1/4]

bool ppforest2::is_classification ( Model const & model )

inline

Whether model was trained for classification.

Encapsulates the training_spec null-check that callers would otherwise repeat. Returns false for an unconfigured model.

◆ is_classification() [2/4]

bool ppforest2::is_classification ( Model const * model )

inline

Pointer overload — null-safe; returns false for a null model.

◆ is_classification() [3/4]

bool ppforest2::is_classification ( TrainingSpec const & spec )

inline

Whether spec describes a classification training run.

◆ is_classification() [4/4]

bool ppforest2::is_classification ( TrainingSpec::Ptr const & spec )

inline

Pointer overload — null-safe; returns false for a null spec.

◆ is_leaf() [1/2]

bool ppforest2::is_leaf ( TreeNode const & node )

Whether node is a TreeLeaf.

Routes through TreeNode::Visitor rather than a virtual method on TreeNode itself — keeps the base class focused on data/traversal and matches the visitor-based dispatch used elsewhere in the codebase. The Ptr overload is a thin dereferencing wrapper.

◆ is_leaf() [2/2]

bool ppforest2::is_leaf ( TreeNode::Ptr const & node )

◆ is_regression() [1/4]

bool ppforest2::is_regression ( Model const & model )

inline

Whether model was trained for regression.

Encapsulates the training_spec null-check that callers would otherwise repeat. Returns false for an unconfigured model.

◆ is_regression() [2/4]

bool ppforest2::is_regression ( Model const * model )

inline

Pointer overload — null-safe; returns false for a null model.

◆ is_regression() [3/4]

bool ppforest2::is_regression ( TrainingSpec const & spec )

inline

Whether spec describes a regression training run.

◆ is_regression() [4/4]

bool ppforest2::is_regression ( TrainingSpec::Ptr const & spec )

inline

Pointer overload — null-safe; returns false for a null spec.

◆ oob_error() [1/4]

std::optional< double > ppforest2::oob_error	(	Forest const &	forest,
		types::FeatureMatrix const &	x,
		types::GroupIdVector const &	y )

Convenience overload — accepts integer class labels for classification.

◆ oob_error() [2/4]

std::optional< double > ppforest2::oob_error	(	Forest const &	forest,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y )

Out-of-bag error.

Classification: misclassification rate. Regression: mean squared error. Returns std::nullopt if no observation has any OOB tree. Throws UserError for unknown modes.

◆ oob_error() [3/4]

template<typename T>

std::optional< double > ppforest2::oob_error	(	std::unique_ptr< T > const &	m,
		types::FeatureMatrix const &	x,
		types::GroupIdVector const &	y )

◆ oob_error() [4/4]

template<typename T>

std::optional< double > ppforest2::oob_error	(	std::unique_ptr< T > const &	m,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y )

◆ oob_metrics() [1/2]

stats::ClassificationMetrics::Maybe ppforest2::oob_metrics	(	ClassificationForest const &	forest,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y )

Out-of-bag metrics — sentinel-free summary of OOB performance.

Mode-specific overloads: classification returns ClassificationMetrics (confusion matrix + error rate), regression returns RegressionMetrics (MSE / MAE / R²). Both return std::nullopt when no observation has any OOB tree (which only happens for empty forests in practice). The internal "no OOB" sentinel never leaks to the caller — these functions own the sentinel-filter logic.

◆ oob_metrics() [2/2]

stats::RegressionMetrics::Maybe ppforest2::oob_metrics	(	RegressionForest const &	forest,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y )

◆ oob_predict() [1/2]

types::OutcomeVector ppforest2::oob_predict	(	Forest const &	forest,
		types::FeatureMatrix const &	x )

Out-of-bag predictions.

Classification: majority-vote labels. Regression: mean of OOB tree predictions. Rows that no tree left out-of-bag are filled with NaN (the same sentinel for both modes — see oob_predict's implementation comment for the rationale). Throws UserError for unknown modes.

Prefer oob_metrics when you want diagnostics — it returns the filtered metric directly with no sentinel exposed to the caller.

◆ oob_predict() [2/2]

template<typename T>

types::OutcomeVector ppforest2::oob_predict	(	std::unique_ptr< T > const &	m,
		types::FeatureMatrix const &	x )

◆ predict_proportions() [1/2]

types::FeatureMatrix ppforest2::predict_proportions	(	Model const &	model,
		types::FeatureMatrix const &	x )

Compute vote proportions for a classification model.

Routes through Model::Visitor to ClassificationTree::predict(x, Proportions{}) or ClassificationForest::predict(x, Proportions{}). Throws UserError if the model is a regression tree or forest. Centralises the visitor boilerplate that the CLI, R bindings, and golden-gen would otherwise each repeat.

◆ predict_proportions() [2/2]

types::FeatureVector ppforest2::predict_proportions	(	Model const &	model,
		types::FeatureVector const &	x )

Single-row vote proportions for a classification model.

◆ user_error() [1/2]

void ppforest2::user_error	(	bool	condition,
		char const *	message )

Throw a UserError if the condition is false.

Parameters

condition	Condition that must hold for valid input.
message	Actionable error message for the user.

◆ user_error() [2/2]

void ppforest2::user_error	(	bool	condition,
		std::string const &	message )

Throw a UserError if the condition is false.

Parameters

condition	Condition that must hold for valid input.
message	Actionable error message for the user.

◆ variable_importance() [1/4]

VariableImportance ppforest2::variable_importance	(	Forest const &	forest,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y,
		int	seed )

Bundle all three VI measures for a forest.

◆ variable_importance() [2/4]

template<typename T>

VariableImportance ppforest2::variable_importance	(	std::unique_ptr< T > const &	m,
		types::FeatureMatrix const &	x )

◆ variable_importance() [3/4]

template<typename T>

VariableImportance ppforest2::variable_importance	(	std::unique_ptr< T > const &	m,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y,
		int	seed )

◆ variable_importance() [4/4]

VariableImportance ppforest2::variable_importance	(	Tree const &	tree,
		types::FeatureMatrix const &	x )

Bundle the available VI measures for a single tree (VI2 only).

◆ vi_permuted() [1/2]

types::FeatureVector ppforest2::vi_permuted	(	Forest const &	forest,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y,
		int	seed )

VI1 — per-variable permuted importance.

Classification: accuracy drop on permuted OOB rows. Regression: NMSE increase. Throws UserError if forest is not a recognised mode.

◆ vi_permuted() [2/2]

template<typename T>

types::FeatureVector ppforest2::vi_permuted	(	std::unique_ptr< T > const &	m,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y,
		int	seed )

◆ vi_projections() [1/3]

types::FeatureVector ppforest2::vi_projections	(	Forest const &	forest,
		int	n_vars,
		types::FeatureVector const *	scale = nullptr )

VI2 for a forest — averaged over non-degenerate trees.

◆ vi_projections() [2/3]

template<typename T>

types::FeatureVector ppforest2::vi_projections	(	std::unique_ptr< T > const &	m,
		int	n_vars,
		types::FeatureVector const *	scale = nullptr )

◆ vi_projections() [3/3]

types::FeatureVector ppforest2::vi_projections	(	Tree const &	tree,
		int	n_vars,
		types::FeatureVector const *	scale = nullptr )

VI2 for a single tree — projection-coefficient importance.

Mode-agnostic (depends only on the tree's projector geometry).

◆ vi_weighted_projections() [1/2]

types::FeatureVector ppforest2::vi_weighted_projections	(	Forest const &	forest,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y,
		types::FeatureVector const *	scale = nullptr )

VI3 — weighted projection-coefficient importance.

Each tree's contribution is weighted by a per-tree OOB quality score (mode-specific). Throws UserError for unknown forest modes.

◆ vi_weighted_projections() [2/2]

template<typename T>

types::FeatureVector ppforest2::vi_weighted_projections	(	std::unique_ptr< T > const &	m,
		types::FeatureMatrix const &	x,
		types::OutcomeVector const &	y,
		types::FeatureVector const *	scale = nullptr )

Namespaces

Classes

Typedefs

Functions

Detailed Description

Typedef Documentation

◆ BaggedTree

Function Documentation

◆ error() [1/2]

◆ error() [2/2]

◆ for_each_bag_with_oob()

◆ is_classification() [1/4]

◆ is_classification() [2/4]

◆ is_classification() [3/4]

◆ is_classification() [4/4]

◆ is_leaf() [1/2]

◆ is_leaf() [2/2]

◆ is_regression() [1/4]

◆ is_regression() [2/4]

◆ is_regression() [3/4]

◆ is_regression() [4/4]

◆ oob_error() [1/4]

◆ oob_error() [2/4]

◆ oob_error() [3/4]

◆ oob_error() [4/4]

◆ oob_metrics() [1/2]

◆ oob_metrics() [2/2]

◆ oob_predict() [1/2]

◆ oob_predict() [2/2]

◆ predict_proportions() [1/2]

◆ predict_proportions() [2/2]

◆ user_error() [1/2]

◆ user_error() [2/2]

◆ variable_importance() [1/4]

◆ variable_importance() [2/4]

◆ variable_importance() [3/4]

◆ variable_importance() [4/4]

◆ vi_permuted() [1/2]

◆ vi_permuted() [2/2]

◆ vi_projections() [1/3]

◆ vi_projections() [2/3]

◆ vi_projections() [3/3]

◆ vi_weighted_projections() [1/2]

◆ vi_weighted_projections() [2/2]