Signature Description Parameters
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct PolyFitVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using pfit_v = PolyFitVisitor<T, I, A>;
        
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor fits a N-degree polynomial through the given x-y coordinates by the way of Least-Squares and Gaussian-Elimination

The result gives you the vector of coefficients.
There is also a method get_y_fits() which returns the vector of y fits for each given x
There is also a method get_slope() which returns the first (0-degree) coefficient.
There is also a method get_residual() which returns the sum of weighted squared residuals.
    using weight_func = std::function<T (const I &idx, size_t val_index)>;

    explicit
    PolyFitVisitor(std::size_t degree,
                   weight_func w_func = [](const I &, size_t) -> T { return (T(1)); });
		
degree: The polynomial degree
w_func: A functor that provides weights to be applied to sigma values. w_func is passed two parameters: (1) The value of the index column corresponding to the given y value, (2) The corresponding index into the y vector. The default is no weights.
T: Column data type.
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct LogFitVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using lfit_v = LogFitVisitor<T, I, A>;
        
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor fits a y = b0 + b1 * log(x) through the given x-y coordinates by the way of calling PolyFit above on log(x).

The result gives you the vector of two coefficients.
There is also a method get_y_fits() which returns the vector of y fits for each given x
There is also a method get_slope() which returns the first (0-degree) coefficient.
There is also a method get_residual() which returns the sum of weighted squared residuals.
    using weight_func = std::function<T (const I &idx, size_t val_index)>;

    explicit
    LogFitVisitor(weight_func w_func = [](const I &, size_t) -> T { return (T(1)); });
		
w_func: A functor that provides weights to be applied to sigma values. w_func is passed two parameters: (1) The value of the index column corresponding to the given y value, (2) The corresponding index into the y vector. The default is no weights.
T: Column data type.
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct ExponentialFitVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using efit_v = ExponentialFitVisitor<T, I, A>;
        
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

The result gives you the vector of y fits for each given x.
There is also a method get_slope() which returns the slope -- coefficient of the exponetial function.
There is also a method get_residual() which returns the sum of squared residuals.
There is also a method get_intercept() which returns the intercept.
    ExponentialFitVisitor();
		
T: Column data type.
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct LinearFitVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using linfit_v = LinearFitVisitor<T, I, A>;
        
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

The result gives you the vector of y fits for each given x.
There is also a method get_slope() which returns the slope -- coefficient of the exponetial function.
There is also a method get_residual() which returns the sum of squared residuals.
There is also a method get_intercept() which returns the intercept.
    LinearFitVisitor();
		
T: Column data type.
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct CubicSplineFitVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using csfit_v = CubicSplineFitVisitor<T, I, A>;
        
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor uses cubic spline method to fit y into x-y coordinates specified by input. It is best discribed here.
yi(x) = ai + bi(x - xi) + ci(x - xi)2 + di(x - xi)3
a is just the input y vector

The result gives you the b vector of coefficients.
There is also a method get_c_vec() which returns the c vector of coefficients.
There is also a method get_d_vec() which returns the d vector of coefficients.
    CubicSplineFitVisitor();
		
T: Column data type.
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
static void test_PolyFitVisitor()  {

    std::cout << "\nTesting PolyFitVisitor{  } ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466,
          123467, 123468, 123469, 123470, 123471, 123472, 123473, 123467, 123468, 123469, 123470, 123471, 123472, 123473,
          123467, 123468, 123469, 123470, 123471, 123472, 123473,
        };
    MyDataFrame                 df;

    df.load_index(std::move(idx));
    df.load_column<double>("X1", { 1, 2, 3, 4, 5 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("Y1", { 6, 7, 8, 9, 3 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("X2", { 0.0, 1.0, 2.0, 3.0,  4.0,  5.0 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("Y2", { 0.0, 0.8, 0.9, 0.1, -0.8, -1.0 }, nan_policy::dont_pad_with_nans);

    PolyFitVisitor<double>  poly_v1 (2);
    PolyFitVisitor<double>  poly_v12 (
        2,
        [](const unsigned int &, std::size_t i) -> double {
            const std::array<double, 5> weights = { 0.1, 0.8, 0.3, 0.5, 0.2 };

            return (weights[i]);
        });
    auto                    result1 = df.single_act_visit<double, double>("X1", "Y1", poly_v1).get_result();
    auto                    result12 = df.single_act_visit<double, double>("X1", "Y1", poly_v12).get_result();
    auto                    actual1 = std::vector<double> { 0.8, 5.6, -1 };
    auto                    actual1_y = std::vector<double> { 5.4, 8, 8.6, 7.2, 3.8 };
    auto                    actual12 = std::vector<double> { -1.97994, 6.99713, -1.14327 };

    assert(std::fabs(poly_v1.get_residual() - 5.6) < 0.00001);
    for (size_t i = 0; i < result1.size(); ++i)
       assert(fabs(result1[i] - actual1[i]) < 0.00001);
    for (size_t i = 0; i < poly_v1.get_y_fits().size(); ++i)
       assert(fabs(poly_v1.get_y_fits()[i] - actual1_y[i]) < 0.01);

    assert(std::fabs(poly_v12.get_residual() - 0.70981) < 0.00001);
    for (size_t i = 0; i < result12.size(); ++i)
       assert(fabs(result12[i] - actual12[i]) < 0.00001);

    PolyFitVisitor<double>  poly_v2 (3);
    auto                    result2 = df.single_act_visit<double, double>("X2", "Y2", poly_v2).get_result();
    auto                    actual2 = std::vector<double> { -0.0396825, 1.69312, -0.813492, 0.087037 };

    for (size_t i = 0; i < result2.size(); ++i)
       assert(fabs(result2[i] - actual2[i]) < 0.00001);
}

// -----------------------------------------------------------------------------

static void test_LogFitVisitor()  {

    std::cout << "\nTesting LogFitVisitor{  } ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466,
          123467, 123468, 123469, 123470, 123471, 123472, 123473, 123467, 123468, 123469, 123470, 123471, 123472, 123473,
          123467, 123468, 123469, 123470, 123471, 123472, 123473,
        };
    MyDataFrame                 df;

    df.load_index(std::move(idx));
    df.load_column<double>("X1", { 1, 2, 3, 4, 5 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("Y1", { 6, 7, 8, 9, 3 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("X2", { 1, 2, 4, 6, 8 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("Y2", { 1, 3, 4, 5, 6 }, nan_policy::dont_pad_with_nans);

    LogFitVisitor<double>   log_v1;
    auto                    result1 = df.single_act_visit<double, double>("X1", "Y1", log_v1).get_result();
    auto                    actual1 = std::vector<double> { 6.98618, -0.403317 };
    auto                    actual1_y = std::vector<double> { 6.98618, 6.70662, 6.54309, 6.42706, 6.33706 };

    assert(std::fabs(log_v1.get_residual() - 20.9372) < 0.0001);
    for (size_t i = 0; i < result1.size(); ++i)
       assert(fabs(result1[i] - actual1[i]) < 0.00001);
    for (size_t i = 0; i < log_v1.get_y_fits().size(); ++i)
       assert(fabs(log_v1.get_y_fits()[i] - actual1_y[i]) < 0.01);

    LogFitVisitor<double>   log_v2;
    auto                    result2 = df.single_act_visit<double, double>("X2", "Y2", log_v2).get_result();
    auto                    actual2 = std::vector<double> { 1.11199, 2.25859 };

    assert(std::fabs(log_v2.get_residual() - 0.237476) < 0.00001);
    for (size_t i = 0; i < result2.size(); ++i)
       assert(fabs(result2[i] - actual2[i]) < 0.00001);
}

// -----------------------------------------------------------------------------

static void test_ExponentialFitVisitor()  {

    std::cout << "\nTesting ExponentialFitVisitor{  } ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466,
          123467, 123468, 123469, 123470, 123471, 123472, 123473, 123467, 123468, 123469, 123470, 123471, 123472, 123473,
          123467, 123468, 123469, 123470, 123471, 123472, 123473,
        };
    MyDataFrame                 df;

    df.load_index(std::move(idx));
    df.load_column<double>("X1", { 1, 2, 3, 4, 5 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("Y1", { 6, 7, 8, 9, 3 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("X2", { 1, 2, 4, 6, 8 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("Y2", { 1, 3, 4, 5, 6 }, nan_policy::dont_pad_with_nans);

    ExponentialFitVisitor<double>   exp_v1;
    auto                            result1 = df.single_act_visit<double, double>("X1", "Y1", exp_v1).get_result();
    auto                            actual1 = std::vector<double> { 7.7647, 6.9316, 6.1879, 5.5239, 4.93126 };

    assert(std::fabs(exp_v1.get_residual() - 22.2154) < 0.0001);
    for (size_t i = 0; i < result1.size(); ++i)
        assert(fabs(result1[i] - actual1[i]) < 0.0001);

    efit_v<double>  exp_v2;
    auto            result2 = df.single_act_visit<double, double>("X2", "Y2", exp_v2).get_result();
    auto            actual2 = std::vector<double> { 1.63751, 2.02776, 3.10952, 4.76833, 7.31206 };

    assert(std::fabs(exp_v2.get_residual() - 3.919765) < 0.00001);
    for (size_t i = 0; i < result2.size(); ++i)
        assert(fabs(result2[i] - actual2[i]) < 0.0001);
}

// -----------------------------------------------------------------------------

static void test_LinearFitVisitor()  {

    std::cout << "\nTesting LinearFitVisitor{  } ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466,
          123467, 123468, 123469, 123470, 123471, 123472, 123473, 123467, 123468, 123469, 123470, 123471, 123472, 123473,
          123467, 123468, 123469, 123470, 123471, 123472, 123473,
        };
    MyDataFrame                 df;

    df.load_index(std::move(idx));
    df.load_column<double>("X1", { 1, 2, 3, 4, 5 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("Y1", { 6, 7, 8, 9, 3 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("X2", { 1, 2, 4, 6, 8 }, nan_policy::dont_pad_with_nans);
    df.load_column<double>("Y2", { 1, 3, 4, 5, 6 }, nan_policy::dont_pad_with_nans);

    LinearFitVisitor<double>    lin_v1;
    auto                        result1 = df.single_act_visit<double, double>("X1", "Y1", lin_v1).get_result();
    auto                        actual1 = std::vector<double> { 7.4, 7, 6.6, 6.2, 5.8 };

    assert(std::fabs(lin_v1.get_residual() - 19.6) < 0.01);
    for (size_t i = 0; i < result1.size(); ++i)
        assert(fabs(result1[i] - actual1[i]) < 0.0001);

    linfit_v<double>    lin_v2;
    auto                result2 = df.single_act_visit<double, double>("X2", "Y2", lin_v2).get_result();
    auto                actual2 = std::vector<double> { 1.73171, 2.37805, 3.67073, 4.96341, 6.2561 };

    assert(std::fabs(lin_v2.get_residual() - 1.097561) < 0.00001);
    for (size_t i = 0; i < result2.size(); ++i)
        assert(fabs(result2[i] - actual2[i]) < 0.0001);
}
C++ DataFrame