Signature | Description |
---|---|
enum class drop_policy : unsigned charĀ { all = 1, // Remove row if all columns are nan any = 2, // Remove row if any column is nan threshold = 3 // Remove row if threshold number of columns are nan }; |
This policy specifies what rows to drop/remove based on missing column data all: Drop the row if all columns are missing any: Drop the row if any column is missing threshold: Drop the column if threshold number of columns are missing |
Signature | Description | Parameters |
---|---|---|
template<typename ... Ts> void drop_missing(drop_policy policy, std::size_t threshold = 0); |
It removes a row if any or all or some of the columns are NaN, based on drop policy |
Ts: The list of types for all columns. A type should be specified only once. threshold: If drop policy is threshold, it specifies the numbers of NaN columns before removing the row. |
static void test_drop_missing_all_no_drop() { std::cout << "\nTesting drop_missing(all) no drop ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, std::numeric_limits<double>::quiet_NaN(), 6, 7, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, std::numeric_limits<double>::quiet_NaN(), 11, 12, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 20, 22, 23, 30, 31, std::numeric_limits<double>::quiet_NaN(), 1.89 }; std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(), 16, std::numeric_limits<double>::quiet_NaN(), 18, 19, 16, std::numeric_limits<double>::quiet_NaN(), 0.34, 1.56, 0.34, 2.3, 0.34, std::numeric_limits<double>::quiet_NaN() }; std::vector<int> i1 = { 22, std::numeric_limits<int>::quiet_NaN(), std::numeric_limits<int>::quiet_NaN(), 25, std::numeric_limits<int>::quiet_NaN() }; std::vector<std::string> s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo", "pppp", "2222", "aaaa", "dddd" }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_str", s1), std::make_pair("col_4", i1)); std::cout << "Original DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); df.drop_missing<int, double, std::string>(drop_policy::all); std::cout << "After drop missing all DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); } // ----------------------------------------------------------------------------- static void test_drop_missing_all_2_drop() { std::cout << "\nTesting drop_missing(all) 2 drop ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 7, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 12, 13, 14 }; std::vector<double> d2 = { 8, 9, std::numeric_limits<double>::quiet_NaN(), 11, 12, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 20, 22, 23, std::numeric_limits<double>::quiet_NaN(), 31, std::numeric_limits<double>::quiet_NaN(), 1.89 }; std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(), 16, std::numeric_limits<double>::quiet_NaN(), 18, 19, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 0.34, 1.56, 0.34, std::numeric_limits<double>::quiet_NaN(), 0.34, std::numeric_limits<double>::quiet_NaN() }; std::vector<int> i1 = { 22, std::numeric_limits<int>::quiet_NaN(), std::numeric_limits<int>::quiet_NaN(), 25, std::numeric_limits<int>::quiet_NaN() }; std::vector<std::string> s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "", "iiii", "oooo", "pppp", "2222", "", "dddd" }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_str", s1), std::make_pair("col_4", i1)); std::cout << "Original DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); df.drop_missing<int, double, std::string>(drop_policy::all); std::cout << "After drop missing all DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); } // ----------------------------------------------------------------------------- static void test_drop_missing_any() { std::cout << "\nTesting drop_missing(any) ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, std::numeric_limits<double>::quiet_NaN(), 6, 7, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, std::numeric_limits<double>::quiet_NaN(), 11, 12, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 20, 22, 23, 30, 31, std::numeric_limits<double>::quiet_NaN(), 1.89 }; std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(), 16, std::numeric_limits<double>::quiet_NaN(), 18, 19, 16, std::numeric_limits<double>::quiet_NaN(), 0.34, 1.56, 0.34, 2.3, 0.34, std::numeric_limits<double>::quiet_NaN() }; std::vector<int> i1 = { 22, std::numeric_limits<int>::quiet_NaN(), std::numeric_limits<int>::quiet_NaN(), 25, std::numeric_limits<int>::quiet_NaN() }; std::vector<std::string> s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo", "pppp", "2222", "aaaa", "dddd" }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_str", s1), std::make_pair("col_4", i1)); std::cout << "Original DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); df.drop_missing<int, double, std::string>(drop_policy::any); std::cout << "After drop missing all DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); } // ----------------------------------------------------------------------------- static void test_drop_threashold_3() { std::cout << "\nTesting drop_missing(threshold=3) ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, std::numeric_limits<double>::quiet_NaN(), 6, 7, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, std::numeric_limits<double>::quiet_NaN(), 11, 12, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 20, 22, 23, 30, 31, std::numeric_limits<double>::quiet_NaN(), 1.89 }; std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(), 16, std::numeric_limits<double>::quiet_NaN(), 18, 19, 16, std::numeric_limits<double>::quiet_NaN(), 0.34, 1.56, 0.34, 2.3, 0.34, std::numeric_limits<double>::quiet_NaN() }; std::vector<int> i1 = { 22, std::numeric_limits<int>::quiet_NaN(), std::numeric_limits<int>::quiet_NaN(), 25, std::numeric_limits<int>::quiet_NaN() }; std::vector<std::string> s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo", "pppp", "2222", "aaaa", "dddd" }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_str", s1), std::make_pair("col_4", i1)); std::cout << "Original DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); df.drop_missing<int, double, std::string>(drop_policy::threshold, 3); std::cout << "After drop missing all DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); }