Signature | Description | Parameters |
---|---|---|
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long, typename Cmp = std::less<T>> struct ExtremumSubArrayVisitor; // ------------------------------------- template<typename T, typename I = unsigned long> using MaxSubArrayVisitor = ExtremumSubArrayVisitor<T, I, std::less<T>>; template<typename T, typename I = unsigned long> using MinSubArrayVisitor = ExtremumSubArrayVisitor<T, I, std::greater<T>>; |
This functor finds the subinterval within the given column that contains elements with maximum or minimum (depending on Cmp. The default is maximum) sum than any other interval. It does it in O(n). In the constructor, the user can specify maximum and minimum values to consider. The default is to consider all values. get_result() returns the maximum sum. get_begin_idx() returns the beginning index of the interval get_end_idx() returns the end (exclusive) index of the interval explicit ExtremumSubArrayVisitor(T min_to_consider = -std::numeric_limits<T>::max(), T max_to_consider = std::numeric_limits<T>::max()); |
T: Column data type. T must be an arithmetic-enabled type I: Index type. |
#include <DataFrame/DataFrameStatsVisitors.h> template<std::size_t N, typename T, typename I = unsigned long, typename Cmp = std::less<T>, std::size_t A = 0> struct NExtremumSubArrayVisitor; // ------------------------------------- template<std::size_t N, typename T, typename I, std::size_t A = 0> using NMaxSubArrayVisitor = NExtremumSubArrayVisitor<N, T, I, std::less<T>, A>; template<std::size_t N, typename T, typename I, std::size_t A = 0> using NMinSubArrayVisitor = NExtremumSubArrayVisitor<N, T, I, std::greater<T>, A>; |
This functor is similar to the above ExtremumSubArrayVisitor, but it finds at most N subintervals within the given column that contain elements with maximum or minimum sum than any other interval. It does it in O(n + kNlog(N)); where n = column length, k = a multiplier less than n, N = number of max sums to find The reason these two visitors are separated is because of performance complexity get_result() returns a std::vector of the folloing struct sorted Cmp parameter. struct SubArrayInfo { T sum { }; std::size_t begin_index { 0 }; std::size_t end_index { 0 }; }; explicit NExtremumSubArrayVisitor(T min_to_consider = -std::numeric_limits<T>::max(), T max_to_consider = std::numeric_limits<T>::max()); |
T: Column data type. T must be an arithmetic-enabled type I: Index type A: Memory alignment boundary for vectors. Default is system default alignment |
static void test_ExtremumSubArrayVisitor() { std::cout << "\nTesting ExtremumSubArrayVisitor{ } ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466, 123467, 123468, 123469, 123470, 123471, 123472, 123473, 123467, 123468, 123469, 123470, 123471, 123472, 123473, 123467, 123468, 123469, 123470, 123471, 123472, 123473, }; MyDataFrame df; df.load_index(std::move(idx)); df.load_column<double>("dbl_col1", { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, nan_policy::dont_pad_with_nans); df.load_column<double>("dbl_col2", { -3, 1, -8, 4, -1, 2, 1, -5, 5 }, nan_policy::dont_pad_with_nans); ExtremumSubArrayVisitor<double> msa_v; df.visit<double>("dbl_col1", msa_v); assert(msa_v.get_result() == 55); assert(msa_v.get_begin_idx() == 0); assert(msa_v.get_end_idx() == 10); df.visit<double>("dbl_col2", msa_v); assert(msa_v.get_result() == 6); assert(msa_v.get_begin_idx() == 3); assert(msa_v.get_end_idx() == 7); } // ----------------------------------------------------------------------------- static void test_NExtremumSubArrayVisitor() { std::cout << "\nTesting NExtremumSubArrayVisitor{ } ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466, 123467, 123468, 123469, 123470, 123471, 123472, 123473, 123467, 123468, 123469, 123470, 123471, 123472, 123473, 123467, 123468, 123469, 123470, 123471, 123472, 123473, }; MyDataFrame df; df.load_index(std::move(idx)); df.load_column<double>("dbl_col1", { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, nan_policy::dont_pad_with_nans); df.load_column<double>("dbl_col2", { -3, 1, -8, 4, -1, 2, 1, -5, 5 }, nan_policy::dont_pad_with_nans); NExtremumSubArrayVisitor<5, double> msa_v; df.visit<double>("dbl_col1", msa_v); assert((msa_v.get_result()[0].sum == 21)); assert((msa_v.get_result()[0].begin_index == 0)); assert((msa_v.get_result()[0].end_index == 6)); assert((msa_v.get_result()[1].sum == 28)); assert((msa_v.get_result()[1].begin_index == 0)); assert((msa_v.get_result()[1].end_index == 7)); assert((msa_v.get_result()[2].sum == 36)); assert((msa_v.get_result()[2].begin_index == 0)); assert((msa_v.get_result()[2].end_index == 8)); assert((msa_v.get_result()[3].sum == 45)); assert((msa_v.get_result()[3].begin_index == 0)); assert((msa_v.get_result()[3].end_index == 9)); assert((msa_v.get_result()[4].sum == 55)); assert((msa_v.get_result()[4].begin_index == 0)); assert((msa_v.get_result()[4].end_index == 10)); NExtremumSubArrayVisitor<4, double> msa_v2; df.visit<double>("dbl_col2", msa_v2); assert((msa_v2.get_result()[0].sum == 1)); assert((msa_v2.get_result()[0].begin_index == 1)); assert((msa_v2.get_result()[0].end_index == 2)); assert((msa_v2.get_result()[1].sum == 4)); assert((msa_v2.get_result()[1].begin_index == 3)); assert((msa_v2.get_result()[1].end_index == 4)); assert((msa_v2.get_result()[2].sum == 5)); assert((msa_v2.get_result()[2].begin_index == 3)); assert((msa_v2.get_result()[2].end_index == 6)); assert((msa_v2.get_result()[3].sum == 6)); assert((msa_v2.get_result()[3].begin_index == 3)); assert((msa_v2.get_result()[3].end_index == 7)); NExtremumSubArrayVisitor<5, double, unsigned long, std::greater<double>> msa_v3; df.visit<double>("dbl_col1", msa_v3); assert((msa_v3.get_result()[0].sum == 1)); assert((msa_v3.get_result()[0].begin_index == 0)); assert((msa_v3.get_result()[0].end_index == 1)); df.visit<double>("dbl_col2", msa_v3); assert((msa_v3.get_result()[0].sum == -3)); assert((msa_v3.get_result()[0].begin_index == 0)); assert((msa_v3.get_result()[0].end_index == 1)); assert((msa_v3.get_result()[1].sum == -10)); assert((msa_v3.get_result()[1].begin_index == 0)); assert((msa_v3.get_result()[1].end_index == 3)); }