Signature Description

enum class drop_policy : unsigned charĀ  {
    all = 1,      // Remove row if all columns are nan
    any = 2,      // Remove row if any column is nan
    threshold = 3 // Remove row if threshold number of columns are nan
}; 
This policy specifies what rows to drop/remove based on missing column data
all: Drop the row if all columns are missing
any: Drop the row if any column is missing
threshold: Drop the column if threshold number of columns are missing

Signature Description Parameters

template<typename ... Ts>
void
drop_missing(drop_policy policy, std::size_t threshold = 0);
        
It removes a row if any or all or some of the columns are NaN, based on drop policy Ts: The list of types for all columns. A type should be specified only once.
threshold: If drop policy is threshold, it specifies the numbers of NaN columns before removing the row.
static void test_drop_missing_all_no_drop()  {

    std::cout << "\nTesting drop_missing(all) no drop ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo", "pppp", "2222", "aaaa", "dddd" };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_str", s1),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.drop_missing<int, double, std::string>(drop_policy::all);

    std::cout << "After drop missing all DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_drop_missing_all_2_drop()  {

    std::cout << "\nTesting drop_missing(all) 2 drop ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23,
                               std::numeric_limits<double>::quiet_NaN(),
                               31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "", "iiii", "oooo", "pppp", "2222", "", "dddd" };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_str", s1),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.drop_missing<int, double, std::string>(drop_policy::all);

    std::cout << "After drop missing all DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_drop_missing_any()  {

    std::cout << "\nTesting drop_missing(any) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo", "pppp", "2222", "aaaa", "dddd" };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_str", s1),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.drop_missing<int, double, std::string>(drop_policy::any);

    std::cout << "After drop missing all DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_drop_threashold_3()  {

    std::cout << "\nTesting drop_missing(threshold=3) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo", "pppp", "2222", "aaaa", "dddd" };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_str", s1),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.drop_missing<int, double, std::string>(drop_policy::threshold, 3);

    std::cout << "After drop missing all DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}
C++ DataFrame