Signature Description Parameters

template<typename T, typename I_V, typename ... Ts>
DataFrame
groupby1(const char *col_name,
         I_V &&idx_visitor,
         Ts&& ... args) const; 
        
This method groups the DataFrame by the named column of type T. The group-by’ing is done by equality. The comparison and equality operators must be well defined for type T. It returns a new DataFrame that has been group-by’ed.
The summarization of columns is specified by a list of 3-member-tuples (triples) of the following format:
  1. Current DataFrame column name
  2. Column name for the new bucketized DataFrame
  3. A visitor to aggregate current column to new column
You must also specify how the index column is summarized, by providing a visitor.
If the named column is other than index column, then the returned DataFrame also has a column with the same name which has the unique values of the named column.
Also see bucketize().
T: Type of groupby column. In case if index, it is type of index
I_V: Type of visitor to be used to summarize the index column
Ts: Types of triples to specify the column summarization
col_name: Name of the grouop-by'ing column
idx_visitor: A visitor to specify the index summarization
args: List of triples to specify the column summarization

template<typename T1, typename T2, typename I_V,
         typename ... Ts>
DataFrame
groupby2(const char *col_name1,
         const char *col_name2,
         I_V &&idx_visitor,
         Ts&& ... args) const; 
        
This is the same as above groupby1() but it groups by two columns
T1: Type of first groupby column. In case if index, it is type of index
T2: Type of second groupby column. In case if index, it is type of index
Ts: Types of triples to specify the column summarization
col_name1: Name of the first grouop-by'ing column
col_name2: Name of the second grouop-by'ing column
idx_visitor: A visitor to specify the index summarization
args: List of triples to specify the column summarization

template<typename T1, typename T2, typename T3,
         typename I_V, typename ... Ts>
DataFrame
groupby3(const char *col_name1,
         const char *col_name2,
         const char *col_name3,
         I_V &&idx_visitor,
         Ts&& ... args) const; 
        
This is the same as above groupby2() but it groups by three columns
T1: Type of first groupby column. In case if index, it is type of index
T2: Type of second groupby column. In case if index, it is type of index
T3: Type of third groupby column. In case if index, it is type of index
Ts: Types of triples to specify the column summarization
col_name1: Name of the first grouop-by'ing column
col_name2: Name of the second grouop-by'ing column
col_name3: Name of the third grouop-by'ing column
idx_visitor: A visitor to specify the index summarization
args: List of triples to specify the column summarization

template<typename T, typename I_V, typename ... Ts>
std::future<DataFrame>
groupby1_async(const char *col_name,
               I_V &&idx_visitor,
               Ts&& ... args) const; 
        
Same as groupby1() above, but executed asynchronously

template<typename T1, typename T2, typename I_V,
         typename ... Ts>
std::future<DataFrame>
groupby2_async(const char *col_name1,
               const char *col_name2,
               I_V &&idx_visitor,
               Ts&& ... args) const; 
        
Same as groupby2() above, but executed asynchronously

template<typename T1, typename T2, typename T3,
         typename I_V, typename ... Ts>
std::future<DataFrame>
groupby3_async(const char *col_name1,
               const char *col_name2,
               const char *col_name3,
               I_V &&idx_visitor,
               Ts&& ... args) const; 
        
Same as groupby3() above, but executed asynchronously
static void test_groupby()  {

    std::cout << "\nTesting groupby( ) ..." << std::endl;

    std::vector<unsigned long>  ulgvec2 =
        { 123450, 123451, 123452, 123450, 123455, 123450, 123449, 123450, 123451, 123450, 123452, 123450, 123455, 123450,
          123454, 123450, 123450, 123457, 123458, 123459, 123450, 123441, 123442, 123432, 123450, 123450, 123435, 123450 };
    std::vector<unsigned long>  xulgvec2 = ulgvec2;
    std::vector<int>            intvec2 =
        { 1, 2, 3, 4, 5, 3, 7, 3, 9, 10, 3, 2, 3, 14, 2, 2, 2, 3, 2, 3, 3, 3, 3, 3, 36, 2, 45, 2 };
    std::vector<double>         xdblvec2 =
        { 10, 20, 11, 11, 30, 40, 50, 40, 60, 70, 80, 90, 50, 100, 11, 25, 20, 30, 1, 3, 4, 12, 6, 2, 3, 10, 4, 5 };
    std::vector<double>         dblvec22 =
        { 0.998, 1.545, 0.056, 0.15678, 1.545, 0.923, 0.06743, 0.1, -1.545, 0.07865, -0.9999, 1.545, 0.1002, -0.8888,
          0.14, 0.0456, -1.545, -0.8999, 0.01119, 0.8002, -1.545, 0.2, 0.1056, 0.87865, -0.6999, 1.545, 0.1902, -1.545 };
    std::vector<std::string>    strvec2 =
        { "A", "B", "C", "D", "X", "Y", "W", "P", "Z", "S", "M", "B",
          "A", "H", "X", "Q", "V", "P", "W", "K", "I", "L", "J", "N", "Y", "G", "T", "U" };

    MyDataFrame df;

    df.load_data(std::move(ulgvec2),
                 std::make_pair("xint_col", intvec2),
                 std::make_pair("dbl_col", xdblvec2),
                 std::make_pair("dbl_col_2", dblvec22),
                 std::make_pair("str_col", strvec2),
                 std::make_pair("ul_col", xulgvec2));

    auto    fut1 = df.groupby1_async<unsigned long>(DF_INDEX_COL_NAME,
                                                    LastVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(), 
                                                    std::make_tuple("str_col", "sum_str", SumVisitor<std::string>()),
                                                    std::make_tuple("xint_col", "max_int", MaxVisitor<int>()),
                                                    std::make_tuple("xint_col", "min_int", MinVisitor<int>()),
                                                    std::make_tuple("dbl_col", "sum_dbl", SumVisitor<double>()));
    auto    result1 = fut1.get();

    result1.write<std::ostream, std::string, double, int>(std::cout, io_format::csv2);

    auto    fut2 = df.groupby1_async<unsigned long>("ul_col",
                                                    LastVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(), 
                                                    std::make_tuple("str_col", "sum_str", SumVisitor<std::string>()),
                                                    std::make_tuple("xint_col", "max_int", MaxVisitor<int>()),
                                                    std::make_tuple("xint_col", "min_int", MinVisitor<int>()),
                                                    std::make_tuple("dbl_col", "sum_dbl", SumVisitor<double>()));
    auto    result2 = fut2.get();

    result2.write<std::ostream, std::string, double, int, unsigned long>(std::cout, io_format::csv2);

    auto    fut3 = df.groupby1_async<double>("dbl_col_2",
                                             MaxVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(), 
                                             std::make_tuple("str_col", "sum_str", SumVisitor<std::string>()),
                                             std::make_tuple("xint_col", "max_int", MaxVisitor<int>()),
                                             std::make_tuple("xint_col", "min_int", MinVisitor<int>()),
                                             std::make_tuple("dbl_col", "sum_dbl", SumVisitor<double>()));
    auto    result3 = fut3.get();

    result3.write<std::ostream, std::string, double, int>(std::cout, io_format::csv2);
}

// -----------------------------------------------------------------------------

static void test_groupby_2()  {

    std::cout << "\nTesting groupby_2( ) ..." << std::endl;

    std::vector<unsigned long>  ulgvec2 =
        { 123450, 123451, 123452, 123450, 123455, 123450, 123449, 123450, 123451, 123450, 123452, 123450, 123455, 123450,
          123454, 123450, 123450, 123457, 123458, 123459, 123450, 123441, 123442, 123432, 123450, 123450, 123435, 123450 };
    std::vector<unsigned long>  xulgvec2 = ulgvec2;
    std::vector<int>            intvec2 =
        { 1, 2, 3, 4, 5, 3, 7, 3, 9, 10, 3, 2, 3, 14, 2, 2, 2, 3, 2, 3, 3, 3, 3, 3, 36, 2, 45, 2 };
    std::vector<double>         xdblvec2 =
        { 10, 20, 11, 11, 30, 40, 50, 40, 60, 70, 80, 90, 50, 100, 11, 25, 20,
          30, 1, 3, 4, 12, 6, 2, 3, 10, 4, 5 };
    std::vector<double>         dblvec22 =
        { 0.998, 1.545, 0.056, 0.15678, 1.545, 0.923, 0.06743, 0.1, -1.545, 0.07865, -0.9999, 1.545, 0.1002, -0.8888,
          0.14, 0.0456, -1.545, -0.8999, 0.01119, 0.8002, -1.545, 0.2, 0.1056, 0.87865, -0.6999, 1.545, 0.1902, -1.545 };
    std::vector<std::string>    strvec2 =
        { "A", "B", "C", "D", "X", "Y", "W", "P", "Z", "S", "M", "B",
          "A", "H", "X", "Q", "V", "P", "W", "K", "I", "L", "J", "N", "Y", "G", "T", "U" };

    MyDataFrame df;

    df.load_data(std::move(ulgvec2),
                 std::make_pair("xint_col", intvec2),
                 std::make_pair("dbl_col", xdblvec2),
                 std::make_pair("dbl_col_2", dblvec22),
                 std::make_pair("str_col", strvec2),
                 std::make_pair("ul_col", xulgvec2));

    auto    result1 = df.groupby2<unsigned long, double>(DF_INDEX_COL_NAME,
                                                         "dbl_col_2",
                                                         LastVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(), 
                                                         std::make_tuple("str_col", "sum_str", SumVisitor<std::string>()),
                                                         std::make_tuple("xint_col", "max_int", MaxVisitor<int>()),
                                                         std::make_tuple("xint_col", "min_int", MinVisitor<int>()),
                                                         std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
                                                         std::make_tuple("dbl_col", "sum_dbl", SumVisitor<double>()));

    result1.write<std::ostream, std::string, double, std::size_t, int>(std::cout, io_format::csv2);

    auto    result2 = df.groupby2<double, unsigned long>("dbl_col_2",
                                                         DF_INDEX_COL_NAME,
                                                         MinVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(), 
                                                         std::make_tuple("str_col", "sum_str", SumVisitor<std::string>()),
                                                         std::make_tuple("xint_col", "max_int", MaxVisitor<int>()),
                                                         std::make_tuple("xint_col", "min_int", MinVisitor<int>()),
                                                         std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
                                                         std::make_tuple("dbl_col", "sum_dbl", SumVisitor<double>()));

    result2.write<std::ostream, std::string, double, std::size_t, int>(std::cout, io_format::csv2);

    auto    result3 = df.groupby2<double, int>("dbl_col_2",
                                               "xint_col",
                                               MaxVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(), 
                                               std::make_tuple("str_col", "sum_str", SumVisitor<std::string>()),
                                               std::make_tuple("xint_col", "max_int", MaxVisitor<int>()),
                                               std::make_tuple("xint_col", "min_int", MinVisitor<int>()),
                                               std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
                                               std::make_tuple("dbl_col", "sum_dbl", SumVisitor<double>()));

    result3.write<std::ostream, std::string, double, std::size_t, int>(std::cout, io_format::csv2);

    auto    result4 = df.groupby2<int, double>("xint_col",
                                               "dbl_col_2",
                                               FirstVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(), 
                                               std::make_tuple("str_col", "sum_str", SumVisitor<std::string>()),
                                               std::make_tuple("xint_col", "max_int", MaxVisitor<int>()),
                                               std::make_tuple("xint_col", "min_int", MinVisitor<int>()),
                                               std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
                                               std::make_tuple("dbl_col", "sum_dbl", SumVisitor<double>()));

    result4.write<std::ostream, std::string, double, std::size_t, int>(std::cout, io_format::csv2);

    auto    result5 = df.groupby2<std::string, unsigned long>("str_col",
                                                              DF_INDEX_COL_NAME,
                                                              FirstVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(), 
                                                              std::make_tuple("str_col", "sum_str", SumVisitor<std::string>()),
                                                              std::make_tuple("xint_col", "max_int", MaxVisitor<int>()),
                                                              std::make_tuple("xint_col", "min_int", MinVisitor<int>()),
                                                              std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
                                                              std::make_tuple("dbl_col", "sum_dbl", SumVisitor<double>()));

    result5.write<std::ostream, std::string, double, std::size_t, int>(std::cout, io_format::csv2);
}
// -----------------------------------------------------------------------------

static void test_groupby_3()  {

    std::cout << "\nTesting groupby_3( ) ..." << std::endl;

    std::vector<unsigned long>  ulgvec2 =
        { 1, 2, 2, 2, 3, 4, 5, 5, 6, 6, 6, 7, 8, 9, 10, 10, 10, 11, 11, 11, 12,
          13, 13, 14, 15, 16, 17, 17 };
    std::vector<unsigned long>  xulgvec2 = ulgvec2;
    std::vector<int>            intvec2 =
        { 1, 2, 3, 4, 5, 3, 7, 3, 9, 10, 3, 2, 3, 14, 2, 2, 2, 3, 2, 3, 3, 3, 3, 3, 36, 2, 45, 2 };
    std::vector<double>         xdblvec2 =
        { 10, 20, 20, 11, 30, 40, 50, 50, 50, 50, 80, 90, 50, 11, 11, 25, 20, 30, 1, 2, 2, 2, 6, 2, 3, 10, 4, 5 };
    std::vector<double>         dblvec22 =
        { 0.998, 1.545, 0.056, 0.15678, 1.545, 0.923, 0.06743, 0.1, -1.545, 0.07865, -0.9999, 1.545, 0.1002, -0.8888,
          0.14, 0.0456, -1.545, -0.8999, 0.01119, 0.8002, -1.545, 0.2, 0.1056, 0.87865, -0.6999, 1.545, 0.1902, -1.545 };
    std::vector<std::string>    strvec2 =
        { "A", "A", "A", "B", "C", "C", "C", "C", "Z", "S", "M", "B",
          "A", "H", "X", "B", "Y", "Y", "W", "K", "K", "K", "J", "N", "Y", "G", "K", "B" };

    MyDataFrame df;

    df.load_data(std::move(ulgvec2),
                 std::make_pair("xint_col", intvec2),
                 std::make_pair("dbl_col", xdblvec2),
                 std::make_pair("dbl_col_2", dblvec22),
                 std::make_pair("str_col", strvec2),
                 std::make_pair("ul_col", xulgvec2));

    auto    result1 =
        df.groupby3<double, unsigned long, std::string>("dbl_col",
                                                        DF_INDEX_COL_NAME,
                                                        "str_col",
                                                        LastVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(),
                                                        std::make_tuple("str_col", "sum_str", SumVisitor<std::string>()),
                                                        std::make_tuple("xint_col", "max_int", MaxVisitor<int>()),
                                                        std::make_tuple("xint_col", "min_int", MinVisitor<int>()),
                                                        std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
                                                        std::make_tuple("dbl_col", "sum_dbl", SumVisitor<double>()));

    result1.write<std::ostream, std::string, double, std::size_t, int>(std::cout, io_format::csv2);
}