Signature | Description | Parameters |
---|---|---|
#include <DataFrame/DataFrameStatsVisitors.h> template<std::size_t N, typename T, typename I = unsigned long, typename Cmp = std::less<T>> struct NExtremumVisitor; // ------------------------------------- template<std::size_t N, typename T, typename I = unsigned long> using NLargestVisitor = NExtremumVisitor<N, T, I, std::less<T>>; template<std::size_t N, typename T, typename I = unsigned long> using NSmallestVisitor = NExtremumVisitor<N, T, I, std::greater<T>>; |
This functor class calculates the N smallest/largest values depending on type of Cmp (the default is N largest) of a column. It runs in O(N*M), where N is the number of largest values and M is the total number of all values. If N is relatively small this is better than O(M*logM). The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True. |
N: Number of largest values T: Column data type I: Index type |
static void test_largest_smallest_visitors() { std::cout << "\nTesting Largest/Smallest visitors ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 1.89}; std::vector<double> d3 = { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0 }; std::vector<int> i1 = { 22, 23, 24, 25, 99 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::cout << "Original DF:" << std::endl; df.write<std::ostream, double, int>(std::cout); NLargestVisitor<5, double> nl_visitor; df.visit<double>("col_3", nl_visitor); std::cout << "N largest result for col_3:" << std::endl; for (auto iter : nl_visitor.get_result()) std::cout << iter.index << '|' << iter.value << " "; std::cout << std::endl; nl_visitor.sort_by_index(); std::cout << "N largest result for col_3 sorted by index:" << std::endl; for (auto iter : nl_visitor.get_result()) std::cout << iter.index << '|' << iter.value << " "; std::cout << std::endl; nl_visitor.sort_by_value(); std::cout << "N largest result for col_3 sorted by value:" << std::endl; for (auto iter : nl_visitor.get_result()) std::cout << iter.index << '|' << iter.value << " "; std::cout << std::endl; NSmallestVisitor<5, double> ns_visitor; df.visit<double>("col_3", ns_visitor); std::cout << "N smallest result for col_3:" << std::endl; for (auto iter : ns_visitor.get_result()) std::cout << iter.index << '|' << iter.value << " "; std::cout << std::endl; ns_visitor.sort_by_index(); std::cout << "N smallest result for col_3 sorted by index:" << std::endl; for (auto iter : ns_visitor.get_result()) std::cout << iter.index << '|' << iter.value << " "; std::cout << std::endl; ns_visitor.sort_by_value(); std::cout << "N smallest result for col_3 sorted by value:" << std::endl; for (auto iter : ns_visitor.get_result()) std::cout << iter.index << '|' << iter.value << " "; std::cout << std::endl; }