Signature | Description | Parameters |
---|---|---|
#include <DataFrame/DataFrameStatsVisitors.h> template<std::size_t N, typename T, typename I = unsigned long,> std::size_t A = 0> struct ModeVisitor; // ------------------------------------- template<std::size_t N, typename T, typename I = unsigned long,> std::size_t A = 0> using mode_v = ModeVisitor<N, T, I, A>; |
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface. This functor class finds the N highest mode (N most repeated values) of the given column. The result is an array of N items each of this type: template<typename U> using vec_type = std::vector<U, typename allocator_declare<U, A>::type>; struct DataItem { T value; // Value of the column item VectorConstPtrView<I, A> indices; // List of indices where value occurred // Number of times value occurred inline std::size_t repeat_count() const { return (indices.size()); } // List of column indices where value occurred vec_type<std::size_t> value_indices_in_col; }; |
N: Number of modes to find T: Column data type I: Index type A: Memory alignment boundary for vectors. Default is system default alignment |
static void test_mode() { std::cout << "\nTesting Mode ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466, 123467, 123468, 123469, 123470, 123471, 123472, 123473 }; std::vector<double> d1 = { 1.0, 10, 8, 18, 19, 16, 21, 17, 20, 3, 2, 11, 7.0, 5, 9, 15, 14, 13, 12, 6, 4 }; std::vector<double> d2 = { 1.0, 10, 8, 18, 19, 16, 17, 20, 3, 2, 11, 7.0, 5, 9, 15, 14, 13, 12, 6, 4 }; std::vector<int> i1 = { 1, 1, 2, 4, 3, 4, 5, 2, 1, 2, 2, 3, 4, 5, 7, 1, 2, 3, 2, 6, 4 }; std::vector<int> i2 = { 1, 10, 8, 18, 19, 16, 17, 20, 3, 2, 11, 7, 5, 9, 15, 14, 13, 12, 6, 4 }; std::vector<double> d3 = { 1, 10, std::numeric_limits<double>::quiet_NaN(), 18, 19, 16, 17, 20, std::numeric_limits<double>::quiet_NaN(), 2, 11, 7, std::numeric_limits<double>::quiet_NaN(), 5, 9, 15, 14, 13, 12, 6 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("dblcol_1", d1), std::make_pair("intcol_1", i1)); df.load_column("dblcol_2", std::move(d2), nan_policy::dont_pad_with_nans); df.load_column("intcol_2", std::move(i2), nan_policy::dont_pad_with_nans); df.load_column("dblcol_3", std::move(d3), nan_policy::dont_pad_with_nans); ModeVisitor<3, double> mode_visit; const auto &result = df.single_act_visit<double>("dblcol_3", mode_visit).get_result(); assert(result.size() == 3); assert(result[0].indices.size() == 3); assert(result[0].value_indices_in_col.size() == 3); assert(std::isnan(result[0].get_value())); assert(result[0].repeat_count() == 3); assert(result[0].indices[1] == 123458); assert(result[0].value_indices_in_col[2] == 12); ModeVisitor<4, int> mode_visit2; const auto &result2 = df.single_act_visit<int>("intcol_1", mode_visit2).get_result(); assert(result2.size() == 4); assert(result2[0].indices.size() == 6); assert(result2[0].value_indices_in_col.size() == 6); assert(result2[0].repeat_count() == 6); assert(result2[0].get_value() == 2); assert(result2[1].repeat_count() == 4); }