Signature | Description |
---|---|
enum class concat_policy : unsigned char { // Only apply concatenation to the common columns // common_columns = 1, // Concatenate all columns. If a column does not exist in self, // create one in the result and prepend with nan // all_columns = 2, // The result will have all the columns in self, // but only common columns and index are concatenated // lhs_and_common_columns = 3, }; |
Enumerated type to specify joining/concatenating one DataFrame to the end of another. |
Signature | Description | Parameters |
---|---|---|
template<typename RHS_T, typename ... Ts> DataFrame<I, H> concat(const RHS_T &rhs, concat_policy cp = concat_policy::all_columns) const; |
It concatenates rhs to the end of self and returns the result as another DataFrame. Concatenation is done based on policy |
RHS_T: Type of DataFrame rhs Ts: List all the types of all data columns. A type should be specified in the list only once. rhs: The rhs DataFrame cp: Concatenation policy. See above |
template<typename RHS_T, typename ... Ts> PtrView concat_view( RHS_T &rhs, concat_policy cp = concat_policy::common_columns); |
This behaves just like concat(), but retunrs a view instead of another DataFrame. |
RHS_T: Type of DataFrame rhs Ts: List all the types of all data columns. A type should be specified in the list only once. rhs: The rhs DataFrame cp: Concatenation policy. See above |
template<typename RHS_T, typename ... Ts> ConstPtrView concat_view( RHS_T &rhs, concat_policy cp = concat_policy::common_columns) const; |
Same as above view, but it returns a const view. You can not change data in const views. But if the data is changed in the original DataFrame or through another view, it is refelcted in the const view. |
RHS_T: Type of DataFrame rhs Ts: List all the types of all data columns. A type should be specified in the list only once. rhs: The rhs DataFrame cp: Concatenation policy. See above |
template<typename RHS_T, typename ... Ts> void self_concat(const RHS_T &rhs, bool add_new_columns = true); |
This is similar to concat() method but it is applied to self. It changes self. |
RHS_T: Type of DataFrame rhs Ts: List all the types of all data columns. A type should be specified in the list only once. rhs: The rhs DataFrame add_new_columns: If true, it creates new columns in self and prepend them with nan |
static void test_self_concat() { std::cout << "\nTesting self_concat( ) ..." << std::endl; MyDataFrame df1; std::vector<unsigned long> idxvec = { 1UL, 2UL, 3UL, 10UL, 5UL, 7UL, 8UL, 12UL, 9UL, 12UL, 10UL, 13UL, 10UL, 15UL, 14UL }; std::vector<double> dblvec = { 0.0, 15.0, 14.0, 2.0, 1.0, 12.0, 11.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 9.0, 10.0 }; std::vector<double> dblvec2 = { 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.55, 107.34, 1.8, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0 }; std::vector<int> intvec = { 1, 2, 3, 4, 5, 8, 6, 7, 11, 14, 9, 10, 15, 12, 13 }; std::vector<std::string> strvec = { "zz", "bb", "cc", "ww", "ee", "ff", "gg", "hh", "ii", "jj", "kk", "ll", "mm", "nn", "oo" }; df1.load_data(std::move(idxvec), std::make_pair("dbl_col", dblvec), std::make_pair("int_col", intvec), std::make_pair("str_col", strvec)); MyDataFrame df2 = df1; df2.load_column("dbl_col_2", std::move(dblvec2)); df1.self_concat<decltype(df2), double, int, std::string>(df2, true); assert(df1.get_index().size() == 30); assert(df1.get_column<double>("dbl_col_2").size() == 30); assert(df1.get_column<double>("dbl_col").size() == 30); assert(df1.get_column<std::string>("str_col").size() == 30); assert(df1.get_column<int>("int_col").size() == 30); assert(df1.get_index()[0] == 1); assert(df1.get_index()[14] == 14); assert(df1.get_index()[15] == 1); assert(df1.get_index()[29] == 14); assert(std::isnan(df1.get_column<double>("dbl_col_2")[0])); assert(std::isnan(df1.get_column<double>("dbl_col_2")[14])); assert(df1.get_column<double>("dbl_col_2")[15] == 100.0); assert(df1.get_column<double>("dbl_col_2")[29] == 116.0); assert(df1.get_column<std::string>("str_col")[0] == "zz"); assert(df1.get_column<std::string>("str_col")[14] == "oo"); assert(df1.get_column<std::string>("str_col")[15] == "zz"); assert(df1.get_column<int>("int_col")[0] == 1); assert(df1.get_column<int>("int_col")[14] == 13); assert(df1.get_column<int>("int_col")[15] == 1); } // ----------------------------------------------------------------------------- static void test_concat() { std::cout << "\nTesting concat( ) ..." << std::endl; MyDataFrame df1; std::vector<unsigned long> idxvec = { 1UL, 2UL, 3UL, 10UL, 5UL, 7UL, 8UL, 12UL, 9UL, 12UL, 10UL, 13UL, 10UL, 15UL, 14UL }; std::vector<double> dblvec = { 0.0, 15.0, 14.0, 2.0, 1.0, 12.0, 11.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 9.0, 10.0 }; std::vector<double> dblvec2 = { 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.55, 107.34, 1.8, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0 }; std::vector<int> intvec = { 1, 2, 3, 4, 5, 8, 6, 7, 11, 14, 9, 10, 15, 12, 13 }; std::vector<std::string> strvec = { "zz", "bb", "cc", "ww", "ee", "ff", "gg", "hh", "ii", "jj", "kk", "ll", "mm", "nn", "oo" }; df1.load_data(std::move(idxvec), std::make_pair("dbl_col", dblvec), std::make_pair("int_col", intvec), std::make_pair("str_col", strvec)); MyDataFrame df2 = df1; df2.load_column("dbl_col_2", std::move(dblvec2)); auto result1 = df1.concat<decltype(df2), double, int, std::string>(df2); assert(result1.get_index().size() == 30); assert(result1.get_column<double>("dbl_col_2").size() == 30); assert(result1.get_column<double>("dbl_col").size() == 30); assert(result1.get_column<std::string>("str_col").size() == 30); assert(result1.get_column<int>("int_col").size() == 30); assert(result1.get_index()[0] == 1); assert(result1.get_index()[14] == 14); assert(result1.get_index()[15] == 1); assert(result1.get_index()[29] == 14); assert(std::isnan(result1.get_column<double>("dbl_col_2")[0])); assert(std::isnan(result1.get_column<double>("dbl_col_2")[14])); assert(result1.get_column<double>("dbl_col_2")[15] == 100.0); assert(result1.get_column<double>("dbl_col_2")[29] == 116.0); assert(result1.get_column<std::string>("str_col")[0] == "zz"); assert(result1.get_column<std::string>("str_col")[14] == "oo"); assert(result1.get_column<std::string>("str_col")[15] == "zz"); assert(result1.get_column<int>("int_col")[0] == 1); assert(result1.get_column<int>("int_col")[14] == 13); assert(result1.get_column<int>("int_col")[15] == 1); auto result2 = df1.concat<decltype(df2), double, int, std::string>(df2, concat_policy::common_columns); assert(result2.get_index().size() == 30); assert(result2.get_column<double>("dbl_col").size() == 30); assert(result2.get_column<std::string>("str_col").size() == 30); assert(result2.get_column<std::string>("str_col")[0] == "zz"); assert(result2.get_column<std::string>("str_col")[14] == "oo"); assert(result2.get_column<std::string>("str_col")[15] == "zz"); assert(! result2.has_column("dbl_col_2")); auto result3 = df1.concat<decltype(df2), double, int, std::string>(df2, concat_policy::lhs_and_common_columns); assert((result2.is_equal<int, double, std::string>(result3))); } // ---------------------------------------------------------------------------- static void test_concat_view() { std::cout << "\nTesting concat_view( ) ..." << std::endl; MyDataFrame df1; std::vector<unsigned long> idxvec = { 1UL, 2UL, 3UL, 10UL, 5UL, 7UL, 8UL, 12UL, 9UL, 12UL, 10UL, 13UL, 10UL, 15UL, 14UL }; std::vector<double> dblvec = { 0.0, 15.0, 14.0, 2.0, 1.0, 12.0, 11.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 9.0, 10.0 }; std::vector<double> dblvec2 = { 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.55, 107.34, 1.8, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0 }; std::vector<int> intvec = { 1, 2, 3, 4, 5, 8, 6, 7, 11, 14, 9, 10, 15, 12, 13 }; std::vector<std::string> strvec = { "zz", "bb", "cc", "ww", "ee", "ff", "gg", "hh", "ii", "jj", "kk", "ll", "mm", "nn", "oo" }; df1.load_data(std::move(idxvec), std::make_pair("dbl_col", dblvec), std::make_pair("int_col", intvec), std::make_pair("str_col", strvec)); MyDataFrame df2 = df1; const MyDataFrame &const_df1 = df1; const MyDataFrame &const_df2 = df2; df2.load_column("dbl_col_2", std::move(dblvec2)); auto result1 = df1.concat_view<decltype(df2), double, int, std::string>(df2); auto const_result1 = const_df1.concat_view<decltype(df2), double, int, std::string>(df2); assert(result1.get_index().size() == 30); assert(result1.get_column<double>("dbl_col").size() == 30); assert(result1.get_column<std::string>("str_col").size() == 30); assert(result1.get_column<int>("int_col").size() == 30); assert(result1.get_index()[0] == 1); assert(result1.get_index()[14] == 14); assert(result1.get_index()[15] == 1); assert(result1.get_index()[29] == 14); assert(result1.get_column<std::string>("str_col")[0] == "zz"); assert(result1.get_column<std::string>("str_col")[14] == "oo"); assert(result1.get_column<std::string>("str_col")[15] == "zz"); assert(result1.get_column<int>("int_col")[0] == 1); assert(result1.get_column<int>("int_col")[14] == 13); assert(result1.get_column<int>("int_col")[15] == 1); assert(const_result1.get_index().size() == 30); assert(const_result1.get_column<double>("dbl_col").size() == 30); assert(const_result1.get_column<std::string>("str_col").size() == 30); assert(const_result1.get_column<int>("int_col").size() == 30); assert(const_result1.get_index()[0] == 1); assert(const_result1.get_index()[14] == 14); assert(const_result1.get_index()[15] == 1); assert(const_result1.get_index()[29] == 14); assert(const_result1.get_column<std::string>("str_col")[0] == "zz"); assert(const_result1.get_column<std::string>("str_col")[14] == "oo"); assert(const_result1.get_column<std::string>("str_col")[15] == "zz"); assert(const_result1.get_column<int>("int_col")[0] == 1); assert(const_result1.get_column<int>("int_col")[14] == 13); assert(const_result1.get_column<int>("int_col")[15] == 1); auto result2 = df2.concat_view<decltype(df2), double, int, std::string>(df1, concat_policy::lhs_and_common_columns); auto const_result2 = const_df2.concat_view<decltype(df2), double, int, std::string>(df1, concat_policy::lhs_and_common_columns); assert(result2.get_index().size() == 30); assert(result2.get_column<double>("dbl_col").size() == 30); assert(result2.get_column<std::string>("str_col").size() == 30); assert(result2.get_column<std::string>("str_col")[0] == "zz"); assert(result2.get_column<std::string>("str_col")[14] == "oo"); assert(result2.get_column<std::string>("str_col")[15] == "zz"); assert(result2.get_column<double>("dbl_col_2").size() == 15); assert(result2.get_column<double>("dbl_col_2")[0] == 100.0); assert(result2.get_column<double>("dbl_col_2")[5] == 105.0); assert(result2.get_column<double>("dbl_col_2")[10] == 112.0); assert(const_result2.get_index().size() == 30); assert(const_result2.get_column<double>("dbl_col").size() == 30); assert(const_result2.get_column<std::string>("str_col").size() == 30); assert(const_result2.get_column<std::string>("str_col")[0] == "zz"); assert(const_result2.get_column<std::string>("str_col")[14] == "oo"); assert(const_result2.get_column<std::string>("str_col")[15] == "zz"); assert(const_result2.get_column<double>("dbl_col_2").size() == 15); assert(const_result2.get_column<double>("dbl_col_2")[0] == 100.0); assert(const_result2.get_column<double>("dbl_col_2")[5] == 105.0); assert(const_result2.get_column<double>("dbl_col_2")[10] == 112.0); auto result3 = df1.concat_view<decltype(df1), double, int, std::string>(df2, concat_policy::all_columns); auto const_result3 = const_df1.concat_view<decltype(df1), double, int, std::string>(df2, concat_policy::all_columns); assert(result3.get_index().size() == 30); assert(result3.get_column<double>("dbl_col").size() == 30); assert(result3.get_column<std::string>("str_col").size() == 30); assert(result3.get_column<double>("dbl_col_2").size() == 15); assert(result3.get_column<std::string>("str_col")[0] == "zz"); assert(result3.get_column<std::string>("str_col")[14] == "oo"); assert(result3.get_column<std::string>("str_col")[15] == "zz"); assert(result3.get_column<double>("dbl_col_2").size() == 15); assert(result3.get_column<double>("dbl_col_2")[0] == 100.0); assert(result3.get_column<double>("dbl_col_2")[5] == 105.0); assert(result3.get_column<double>("dbl_col_2")[10] == 112.0); assert(const_result3.get_index().size() == 30); assert(const_result3.get_column<double>("dbl_col").size() == 30); assert(const_result3.get_column<std::string>("str_col").size() == 30); assert(const_result3.get_column<double>("dbl_col_2").size() == 15); assert(const_result3.get_column<std::string>("str_col")[0] == "zz"); assert(const_result3.get_column<std::string>("str_col")[14] == "oo"); assert(const_result3.get_column<std::string>("str_col")[15] == "zz"); assert(const_result3.get_column<double>("dbl_col_2").size() == 15); assert(const_result3.get_column<double>("dbl_col_2")[0] == 100.0); assert(const_result3.get_column<double>("dbl_col_2")[5] == 105.0); assert(const_result3.get_column<double>("dbl_col_2")[10] == 112.0); }