Signature Description

enum class box_cox_type : unsigned char  {
    // y(λ) = (yλ - 1) / λ,  if λ != 0
    // y(λ) = log(y),        if λ == 0
    original = 1,

    // y(λ) = (yλ - 1) / (λ * GM(λ - 1)),  if λ != 0
    // y(λ) = GM * log(y),               if λ == 0
    geometric_mean = 2,

    // y(λ) = sign(y) * (((|y| + 1)λ - 1) / λ),  if λ != 0
    // y(λ) = sign(y) * log(|y| + 1),           if λ == 0
    modulus = 3,

    // y(λ) = (eλy - 1) / λ,  if λ != 0
    // y(λ) = y,              if λ == 0
    exponential = 4,
};
Different Box-Cox transformation formulas to be used with BoxCoxVisitor.

Signature Description Parameters
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct BoxCoxVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long>
using bcox_v = BoxCoxVisitor<T, I>;
        
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This visitor implements the Box-Cox transformation. This is a power transformation to a normal distribution. It is not guaranteed to always work.
The most important factor in this transformation is the power lambda factor. Lambda is usually between -5 and 5.
In case of original and geometric_mean, all series values must be positive. If there are negative values, you must set the is_all_positive flag to false. In this case the visitor will shift the series. The shift value is the absolute value of the min of the series + 0.0000001.
In other types, the series could have both +/- values.

    BoxCoxVisitor(box_cox_type bc_type,
                  T lambda,
                  bool is_all_positive);
        
T: Column data type.
I: Index type.
static void test_BoxCoxVisitor()  {

    std::cout << "\nTesting BoxCoxVisitor{ } ..." << std::endl;

    const size_t            item_cnt = 16;
    MyDataFrame             df;
    RandGenParams<double>   p;

    p.mean = 5.6;
    p.std = 0.5;
    p.seed = 123;
    p.min_value = -15;
    p.max_value = 30;

    df.load_data(MyDataFrame::gen_sequence_index(0, item_cnt, 1),
                 std::make_pair("lognormal", gen_lognormal_dist<double>(item_cnt, p)),
                 std::make_pair("normal", gen_normal_dist<double>(item_cnt, p)),
                 std::make_pair("uniform_real", gen_uniform_real_dist<double>(item_cnt, p)));

    BoxCoxVisitor<double>   bc_v1(box_cox_type::original, 1.5, true);
    const auto              &result1 = df.single_act_visit<double>("lognormal", bc_v1).get_result();
    BoxCoxVisitor<double>   bc_v2(box_cox_type::original, 1.5, false);
    const auto              &result2 = df.single_act_visit<double>("uniform_real", bc_v2).get_result();
    BoxCoxVisitor<double>   bc_v3(box_cox_type::modulus, -0.5, false);
    const auto              &result3 = df.single_act_visit<double>("uniform_real", bc_v3).get_result();
    BoxCoxVisitor<double>   bc_v4(box_cox_type::exponential, -0.5, false);
    const auto              &result4 = df.single_act_visit<double>("uniform_real", bc_v4).get_result();

    for(auto citer : result1)
        std::cout << citer << ", ";
    std::cout << std::endl;
    for(auto citer : result2)
        std::cout << citer << ", ";
    std::cout << std::endl;
    for(auto citer : result3)
        std::cout << citer << ", ";
    std::cout << std::endl;
    for(auto citer : result4)
        std::cout << citer << ", ";
    std::cout << std::endl;
}