Signature | Description |
---|---|
enum class join_policy : unsigned char { inner_join = 1, left_join = 2, right_join = 3, left_right_join = 4 // This is merge }; |
Enumerated type to specify joining two DataFrames |
Signature | Description | Parameters |
---|---|---|
template<typename RHS_T, typename T, typename ... Ts> StdDataFrame<unsigned int> join_by_column(const RHS_T &rhs, const char *name, join_policy jp) const; |
It joins the data between self (lhs) and rhs and returns the joined data in a StdDataFrame, based on specification in join_policy. The returned DataFrame is indexed by a sequence of unsigned integers from 0 to N. The returned DataFrame will at least have two columns names lhs.INDEX and rhs.INDEX containing the lhs and rhs indices based on join policy. The following conditions must be met for this method to compile and work properly:
|
RHS_T: Type of DataFrame rhs T: Type of the named column Ts: List all the types of all data columns. A type should be specified in the list only once. rhs: The rhs DataFrame name: Name of the column which the join will be based on join_policy: Specifies how to join. For example inner join, or left join, etc. (See join_policy definition) |
template<typename RHS_T, typename ... Ts> StdDataFrame<I> join_by_index(const RHS_T &rhs, join_policy jp) const; |
It joins the data between self (lhs) and rhs and returns the joined data in a StdDataFrame, based on specification in join_policy. The following conditions must be met for this method to compile and work properly:
|
RHS_T: Type of DataFrame rhs Ts: List all the types of all data columns. A type should be specified in the list only once. rhs: The rhs DataFrame join_policy: Specifies how to join. For example inner join, or left join, etc. (See join_policy definition) |
static void test_index_inner_join() { std::cout << "\nTesting Index Inner Join ..." << std::endl; std::vector<unsigned long> idx = { 123456, 123451, 123452, 123453, 123454, 123455, 123450, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 7, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 13, 14 }; std::vector<double> d2 = { 14, 9, 10, 11, 12, 13, 8, 20, 22, 23, 30, 31, 32, 1.89}; std::vector<double> d3 = { 21, 16, 15, 18, 19, 16, 15, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0 }; std::vector<int> i1 = { 22, 23, 24, 25, 99 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::vector<unsigned long> idx2 = { 123452, 123453, 123455, 123458, 123466, 223450, 223451, 223454, 223456, 223457, 223459, 223460, 223462, 223461 }; std::vector<double> d12 = { 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, 114, 113 }; std::vector<double> d22 = { 18, 19, 110, 111, 112, 113, 114, 120, 122, 123, 130, 131, 11.89, 132 }; std::vector<double> d32 = { 115, 116, 115, 118, 119, 116, 121, 10.34, 11.56, 10.34, 12.3, 119.0, 10.34 }; std::vector<int> i12 = { 122, 123, 124, 125, 199 }; MyDataFrame df2; df2.load_data(std::move(idx2), std::make_pair("xcol_1", d12), std::make_pair("col_2", d22), std::make_pair("xcol_3", d32), std::make_pair("col_4", i12)); std::cout << "First DF:" << std::endl; df.write<std::ostream, double, int>(std::cout); std::cout << "Second DF2:" << std::endl; df2.write<std::ostream, double, int>(std::cout); MyDataFrame join_df = df.join_by_index<decltype(df2), double, int>(df2, join_policy::inner_join); std::cout << "Now The joined DF:" << std::endl; join_df.write<std::ostream, double, int>(std::cout); } // ----------------------------------------------------------------------------- static void test_index_left_join() { std::cout << "\nTesting Index Left Join ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 1.89}; std::vector<double> d3 = { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0 }; std::vector<int> i1 = { 22, 23, 24, 25, 99 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::vector<unsigned long> idx2 = { 123452, 123453, 123455, 123458, 123466, 223450, 223451, 223454, 223456, 223457, 223459, 223460, 223461, 223462 }; std::vector<double> d12 = { 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, 113, 114 }; std::vector<double> d22 = { 18, 19, 110, 111, 112, 113, 114, 120, 122, 123, 130, 131, 132, 11.89 }; std::vector<double> d32 = { 115, 116, 115, 118, 119, 116, 121, 10.34, 11.56, 10.34, 12.3, 10.34, 119.0 }; std::vector<int> i12 = { 122, 123, 124, 125, 199 }; MyDataFrame df2; df2.load_data(std::move(idx2), std::make_pair("xcol_1", d12), std::make_pair("col_2", d22), std::make_pair("xcol_3", d32), std::make_pair("col_4", i12)); std::cout << "First DF:" << std::endl; df.write<std::ostream, double, int>(std::cout); std::cout << "Second DF2:" << std::endl; df2.write<std::ostream, double, int>(std::cout); MyDataFrame join_df = df.join_by_index<decltype(df2), double, int>(df2, join_policy::left_join); std::cout << "Now The joined DF:" << std::endl; join_df.write<std::ostream, double, int>(std::cout); } // ----------------------------------------------------------------------------- static void test_index_right_join() { std::cout << "\nTesting Index Right Join ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 1.89}; std::vector<double> d3 = { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0 }; std::vector<int> i1 = { 22, 23, 24, 25, 99 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::vector<unsigned long> idx2 = { 123452, 123453, 123455, 123458, 123466, 223450, 223451, 223454, 223456, 223457, 223459, 223460, 223461, 223462 }; std::vector<double> d12 = { 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, 113, 114 }; std::vector<double> d22 = { 18, 19, 110, 111, 112, 113, 114, 120, 122, 123, 130, 131, 132, 11.89 }; std::vector<double> d32 = { 115, 116, 115, 118, 119, 116, 121, 10.34, 11.56, 10.34, 12.3, 10.34, 119.0 }; std::vector<int> i12 = { 122, 123, 124, 125, 199 }; MyDataFrame df2; df2.load_data(std::move(idx2), std::make_pair("xcol_1", d12), std::make_pair("col_2", d22), std::make_pair("xcol_3", d32), std::make_pair("col_4", i12)); std::cout << "First DF:" << std::endl; df.write<std::ostream, double, int>(std::cout); std::cout << "Second DF2:" << std::endl; df2.write<std::ostream, double, int>(std::cout); MyDataFrame join_df = df.join_by_index<decltype(df2), double, int>(df2, join_policy::right_join); std::cout << "Now The joined DF:" << std::endl; join_df.write<std::ostream, double, int>(std::cout); } // ----------------------------------------------------------------------------- static void test_index_left_right_join() { std::cout << "\nTesting Index Left Right Join ..." << std::endl; std::vector<unsigned long> idx = { 123466, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123450 }; std::vector<double> d1 = { 14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1 }; std::vector<double> d2 = { 1.89, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 8 }; std::vector<double> d3 = { 19.0, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 15.0 }; std::vector<int> i1 = { 22, 23, 24, 25, 99 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::vector<unsigned long> idx2 = { 123452, 123453, 123455, 123458, 123466, 223450, 223451, 223454, 223456, 223457, 223459, 223461, 223460, 223462 }; std::vector<double> d12 = { 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 113, 112, 114 }; std::vector<double> d22 = { 18, 19, 110, 111, 112, 113, 114, 120, 122, 123, 130, 132, 131, 11.89 }; std::vector<double> d32 = { 115, 116, 115, 118, 119, 116, 121, 10.34, 11.56, 10.34, 10.34, 12.3, 119.0 }; std::vector<int> i12 = { 122, 123, 124, 125, 199 }; MyDataFrame df2; df2.load_data(std::move(idx2), std::make_pair("xcol_1", d12), std::make_pair("col_2", d22), std::make_pair("xcol_3", d32), std::make_pair("col_4", i12)); std::cout << "First DF:" << std::endl; df.write<std::ostream, double, int>(std::cout); std::cout << "Second DF2:" << std::endl; df2.write<std::ostream, double, int>(std::cout); MyDataFrame join_df = df.join_by_index<decltype(df2), double, int>(df2, join_policy::left_right_join); std::cout << "Now The joined DF:" << std::endl; join_df.write<std::ostream, double, int>(std::cout); } // ----------------------------------------------------------------------------- static void test_join_by_column() { std::cout << "\nTesting join by column ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 1.89}; std::vector<double> d3 = { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0 }; std::vector<int> i1 = { 22, 23, 24, 25, 99 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::vector<unsigned long> idx2 = { 123452, 123453, 123455, 123458, 123466, 223450, 223451, 223454, 223456, 223457, 223459, 223460, 223461, 223462 }; std::vector<double> d12 = { 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, 113, 114 }; std::vector<double> d22 = { 8, 19, 110, 111, 9, 113, 114, 99, 122, 123, 130, 131, 20, 11.89 }; std::vector<double> d32 = { 115, 116, 115, 118, 119, 116, 121, 10.34, 11.56, 10.34, 12.3, 10.34, 119.0 }; std::vector<int> i12 = { 122, 123, 124, 125, 199 }; MyDataFrame df2; df2.load_data(std::move(idx2), std::make_pair("xcol_1", d12), std::make_pair("col_2", d22), std::make_pair("xcol_3", d32), std::make_pair("col_4", i12)); StdDataFrame<unsigned int> inner_result = df.join_by_column<decltype(df2), double, double, int>(df2, "col_2", join_policy::inner_join); assert(inner_result.get_index().size() == 3); assert(inner_result.get_column<double>("xcol_1")[2] == 113.0); assert(inner_result.get_column<double>("xcol_3")[1] == 119.0); assert(inner_result.get_column<double>("col_1")[2] == 8.0); assert(inner_result.get_column<double>("col_3")[0] == 15.0); assert(inner_result.get_column<int>("rhs.col_4")[2] == 0); assert(inner_result.get_column<int>("lhs.col_4")[0] == 22); assert(inner_result.get_column<unsigned long>("rhs.INDEX")[1] == 123466); assert(inner_result.get_column<unsigned long>("lhs.INDEX")[2] == 123457); StdDataFrame<unsigned int> left_result = df.join_by_column<decltype(df2), double, double, int>(df2, "col_2", join_policy::left_join); assert(left_result.get_index().size() == 14); assert(std::isnan(left_result.get_column<double>("xcol_1")[5])); assert(left_result.get_column<double>("xcol_3")[8] == 119.0); assert(left_result.get_column<double>("col_1")[13] == 13.0); assert(left_result.get_column<double>("col_3")[9] == 1.56); assert(left_result.get_column<int>("rhs.col_4")[2] == 199); assert(left_result.get_column<int>("lhs.col_4")[5] == 99); assert(left_result.get_column<unsigned long>("rhs.INDEX")[3] == 0); assert(left_result.get_column<unsigned long>("lhs.INDEX")[11] == 123460); StdDataFrame<unsigned int> right_result = df.join_by_column<decltype(df2), double, double, int>(df2, "col_2", join_policy::right_join); assert(right_result.get_index().size() == 14); assert(right_result.get_column<double>("xcol_1")[5] == 18.0); assert(std::isnan(right_result.get_column<double>("xcol_3")[2])); assert(right_result.get_column<double>("col_1")[4] == 8.0); assert(std::isnan(right_result.get_column<double>("col_3")[5])); assert(right_result.get_column<int>("rhs.col_4")[2] == 0); assert(right_result.get_column<int>("lhs.col_4")[5] == 0); assert(right_result.get_column<unsigned long>("rhs.INDEX")[3] == 123453); assert(right_result.get_column<unsigned long>("lhs.INDEX")[11] == 0); StdDataFrame<unsigned int> left_right_result = df.join_by_column<decltype(df2), double, double, int>(df2, "col_2", join_policy::left_right_join); assert(left_right_result.get_index().size() == 25); assert(left_right_result.get_column<double>("xcol_1")[2] == 15.0); assert(left_right_result.get_column<double>("xcol_3")[1] == 115.0); assert(left_right_result.get_column<double>("col_1")[2] == 2.0); assert(std::isnan(left_right_result.get_column<double>("col_3")[0])); assert(left_right_result.get_column<int>("rhs.col_4")[2] == 199); assert(left_right_result.get_column<int>("lhs.col_4")[0] == 0); assert(left_right_result.get_column<unsigned long>("rhs.INDEX")[1] == 123452); assert(left_right_result.get_column<unsigned long>("lhs.INDEX")[2] == 123451); }