template<typename V, typename IT = int>
std::size_t
load_indicators(const char *cat_col_name,
                const char *numeric_cols_prefix = nullptr);
        
Given a categorical (nominal) column, it generates a series of numerical columns (0 or 1) for each category in the given column.
These numeric indictors encode categorical information.
In machine learning, this is also sometimes referred to as “one-hot” encoding of categorical data

NOTE: The values of the categorical column must be converted to string to generate names for indicator columns
NOTE: The values of the categorical column must be hashable
T: Type of the named categorical column
IT: Type of the generated numerical indicator columns
cat_col_name: Name of the categorical column
numeric_cols_prefix: Optional prefix for generated column names

template<typename V = int, typename CT = std::string>
std::size_t
from_indicators(const std::vector &ind_col_names,
                const char *cat_col_name,
                const char *numeric_cols_prefix = nullptr);
        
This does the opposite of the load_indicators(). Given a bunch of one-hot indicator columns, it reconstructs the category column.
T: Type of the indicator columns
CT: Type of the new categorical column
ind_col_names: Names of the indicator columns
cat_col_name: Name of the new categorical column
numeric_cols_prefix: Prefix of indicator column names
static void test_load_indicators()  {

    std::cout << "\nTesting load_indicators( ) ..." << std::endl;

    MyDataFrame                 df;
    std::vector<unsigned long>  idxvec =
        { 1UL, 2UL, 3UL, 10UL, 5UL, 7UL, 8UL, 12UL, 9UL, 12UL, 10UL, 13UL, 10UL, 15UL, 14UL };
    std::vector<double>         dblvec =
        { 0.0, 15.0, 14.0, 0.0, 1.0, 14.0, 11.5, 11.5, 7.25, 7.25, 7.25, 14.0, 7.25, 15.0, 0.0};
    std::vector<double>         dblvec2 =
        { 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.55, 107.34, 1.8, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0};
    std::vector<int>            intvec = { 1, 2, 3, 4, 5, 8, 6, 7, 11, 14, 9 };
    std::vector<std::string>    strvec =
        { "blue", "blue", "red", "green", "black", "green", "white", "black",
          "black", "white", "red", "yellow", "green", "green", "green" };
    df.load_data(std::move(idxvec),
                 std::make_pair("levels", dblvec),
                 std::make_pair("dbl_col_2", dblvec2),
                 std::make_pair("colors", strvec));
    df.load_column("int_col", std::move(intvec), nan_policy::dont_pad_with_nans);

    df.write<std::ostream, std::string, double, int>(std::cout, io_format::csv2);

    auto  count = df.load_indicators<std::string>("colors");

    df.write<std::ostream, std::string, double, int>(std::cout, io_format::csv2);
    std::cout << "Load count: " << count << std::endl;

    count = df.load_indicators<std::string, bool>("colors", "bool_");
    df.write<std::ostream, std::string, double, int, bool>(std::cout, io_format::csv2);
    std::cout << "Load count: " << count << std::endl;

    count = df.load_indicators<double, double>("levels", "level_");
    df.write<std::ostream, std::string, double, int, bool>(std::cout, io_format::csv2);
    std::cout << "Load count: " << count << std::endl;
}

// -----------------------------------------------------------------------------

static void test_from_indicators()  {

    std::cout << "\nTesting from_indicators( ) ..." << std::endl;

    MyDataFrame                 df;
    std::vector<unsigned long>  idxvec =
        { 1UL, 2UL, 3UL, 10UL, 5UL, 7UL, 8UL, 12UL, 9UL, 12UL, 10UL, 13UL, 10UL, 15UL, 14UL };
    std::vector<double>         dblvec =
        { 0.0, 15.0, 14.0, 0.0, 1.0, 14.0, 11.5, 11.5, 7.25, 7.25, 7.25, 14.0, 7.25, 15.0, 0.0};
    std::vector<double>         dblvec2 =
        { 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.55, 107.34, 1.8, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0};
    std::vector<int>            intvec = { 1, 2, 3, 4, 5, 8, 6, 7, 11, 14, 9 };
    std::vector<std::string>    strvec =
        { "blue", "blue", "red", "green", "black", "green", "white", "black",
          "black", "white", "red", "yellow", "green", "green", "green" };
    df.load_data(std::move(idxvec),
                 std::make_pair("levels", dblvec),
                 std::make_pair("dbl_col_2", dblvec2),
                 std::make_pair("colors", strvec));
    df.load_column("int_col", std::move(intvec), nan_policy::dont_pad_with_nans);

    df.load_indicators<std::string>("colors");
    df.load_indicators<std::string, bool>("colors", "bool_");
    df.load_indicators<double, double>("levels", "level_");

    auto  count =
        df.from_indicators({ "blue", "green", "white", "black", "red", "yellow" }, "colors_copy");

    assert(count == 15);
    assert((df.get_column<std::string>("colors") == df.get_column<std::string>("colors_copy")));

    count =
        df.from_indicators<double, double>({ "level_0", "level_15", "level_14", "level_1", "level_11.5", "level_7.25" },
                                           "levels_copy",
                                           "level_");
    assert(count == 15);
    assert((df.get_column<std::string>("levels") == df.get_column<std::string>("levels_copy")));
}
C++ DataFrame