Signature Description Parameters

#include <DataFrame/DataFrameStatsVisitors.h>

template<std::size_t N, typename T, typename I = unsigned long,
         typename Cmp = std::less<T>>
struct  NExtremumVisitor;

// -------------------------------------

template<std::size_t N, typename T, typename I = unsigned long>
using NLargestVisitor = NExtremumVisitor<N, T, I, std::less<T>>;
template<std::size_t N, typename T, typename I = unsigned long>
using NSmallestVisitor = NExtremumVisitor<N, T, I, std::greater<T>>;
        
This functor class calculates the N smallest/largest values depending on type of Cmp (the default is N largest) of a column. It runs in O(N*M), where N is the number of largest values and M is the total number of all values.
If N is relatively small this is better than O(M*logM). The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True.
N: Number of largest values
T: Column data type
I: Index type
static void test_largest_smallest_visitors()  {

    std::cout << "\nTesting Largest/Smallest visitors ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 1.89};
    std::vector<double> d3 = { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0 };
    std::vector<int>    i1 = { 22, 23, 24, 25, 99 };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, double, int>(std::cout);

    NLargestVisitor<5, double> nl_visitor;

    df.visit<double>("col_3", nl_visitor);
    std::cout << "N largest result for col_3:" << std::endl;
    for (auto iter : nl_visitor.get_result())
        std::cout << iter.index << '|' << iter.value << " ";
    std::cout << std::endl;
    nl_visitor.sort_by_index();
    std::cout << "N largest result for col_3 sorted by index:" << std::endl;
    for (auto iter : nl_visitor.get_result())
        std::cout << iter.index << '|' << iter.value << " ";
    std::cout << std::endl;
    nl_visitor.sort_by_value();
    std::cout << "N largest result for col_3 sorted by value:" << std::endl;
    for (auto iter : nl_visitor.get_result())
        std::cout << iter.index << '|' << iter.value << " ";
    std::cout << std::endl;

    NSmallestVisitor<5, double> ns_visitor;

    df.visit<double>("col_3", ns_visitor);
    std::cout << "N smallest result for col_3:" << std::endl;
    for (auto iter : ns_visitor.get_result())
        std::cout << iter.index << '|' << iter.value << " ";
    std::cout << std::endl;
    ns_visitor.sort_by_index();
    std::cout << "N smallest result for col_3 sorted by index:" << std::endl;
    for (auto iter : ns_visitor.get_result())
        std::cout << iter.index << '|' << iter.value << " ";
    std::cout << std::endl;
    ns_visitor.sort_by_value();
    std::cout << "N smallest result for col_3 sorted by value:" << std::endl;
    for (auto iter : ns_visitor.get_result())
        std::cout << iter.index << '|' << iter.value << " ";
    std::cout << std::endl;
}
C++ DataFrame