Signature Description

enum class hampel_type : unsigned char  {
    mean = 1,   // Use mean absolute deviation
    median = 2, // Use median absolute deviation
};
Different Hampel filter types that are supported. They are to be used with HampelFilterVisitor

Signature Description Parameters
#include <DataFrame/DataFrameTransformVisitors.h>

template<typename T, typename I = unsigned long>
struct HampelFilterVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long>
using hamf_v = HampelFilterVisitor<T, I>;
        
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This is a transformer visitor. It means the column(s) passed to this visitor is not read-only and its values may change

This functor class applies Hampel filter to weed out outliers. It replaces the outliers with NaN. The functor result is number of items replaced. The filter is done by using either mean absolute deviation or median absolute deviation

    explicit
    HampelFilterVisitor(std::size_t widnow_size,
                        hampel_type ht = hampel_type::median,
                        T num_of_std = 3);
        
T: Column data type.
I: Index type.
static void test_HampelFilterVisitor()  {

    std::cout << "\nTesting HampelFilterVisitor{  } ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466,
          123467, 123468, 123469, 123470, 123471, 123472, 123473, 123467, 123468, 123469, 123470, 123471, 123472, 123473,
          123467, 123468, 123469, 123470, 123471, 123472, 123473,
        };
    std::vector<double>         d1 =
        { 2.5, 2.45, -1.65, -0.1, -1.1, 1.87, 0.98, 0.34, 1.56, -12.34, 2.3, -0.34, -1.9, 0.387,
          0.123, 1.06, -0.65, 2.03, 0.4, -1.0, 0.59, 0.125, 1.9, -0.68, 2.0045, 50.8, -1.0, 0.78,
          0.48, 1.99, -0.97, 1.03, 8.678, -1.4, 1.59,
        };
    MyDataFrame                 df;

    df.load_data(std::move(idx), std::make_pair("dbl_col", d1));

    std::cout << std::endl;
    HampelFilterVisitor<double> hf_v(7, hampel_type::mean, 2);
    auto                        result = df.single_act_visit<double>("dbl_col", hf_v).get_result();
    std::vector<double>         hampel_result = {
        2.5, 2.45, -1.65, -0.1, -1.1, 1.87, 0.98, 0.34, 1.56,
        std::numeric_limits<double>::quiet_NaN(), 2.3, -0.34, -1.9, 0.387,
        0.123, 1.06, -0.65, 2.03, 0.4, -1, 0.59, 0.125, 1.9, -0.68, 2.0045,
        std::numeric_limits<double>::quiet_NaN(), -1, 0.78, 0.48, 1.99,
        -0.97, 1.03, 8.678, -1.4, 1.59
    };
    const auto                  &column = df.get_column<double>("dbl_col");

    assert(result == 2);
    for (size_t idx = 0; idx < hampel_result.size(); ++idx)  {
        const auto  v = column[idx];

        if (std::isnan(v))
            assert(std::isnan(hampel_result[idx]));
        else
            assert(hampel_result[idx] == v);
    }
}
C++ DataFrame