Signature | Description | Parameters |
---|---|---|
template<typename ... Ts> DataFrame get_data_by_loc(Index2D<IndexType> range) const; |
It returns a DataFrame (including the index and data columns) containing the data from location begin to location end. This function supports Python-like negative indexing. That is why the range type is long. |
Ts: The list of types for all columns. A type should be specified only once. range: The begin and end iterators for data |
template<typename ... Ts> DataFrame< get_data_by_loc(const std::vector<long> &locations) const; |
It returns a DataFrame (including the index and data columns) containing the data from locations, specified in locations vector. This function supports Python-like negative indexing. That is why the locations vector type is long. NOTE: The negative indexing is relative to the "index" column, which may not be the size as all other column. The returned DataFrame is in the same order as locations parameter |
Ts: The list of types for all columns. A type should be specified only once. locations: List of indices into the index column to copy data |
template<typename ... Ts> DataFrameView<I> get_view_by_loc(Index2D<IndexType> range) const; |
It behaves like get_data_by_loc(), but it returns a DataFrameView. A view is a DataFrame that is a reference to the original DataFrame. So if you modify anything in the view the original DataFrame will also be modified. NOTE: There are certain operations that you cannot do with a view. For example, you cannot add/delete columns, etc. NOTE: Although this is a const method, it returns a view. So, the data could still be modified through the returned view |
Ts: The list of types for all columns. A type should be specified only once. range: The begin and end iterators for data |
template<typename ... Ts> DataFramePtrView<I> get_view_by_loc(const std::vector<long> &locations) const; |
It behaves like get_data_by_loc(locations), but it returns a DataFramePtrView. A view is a DataFrame that is a reference to the original DataFrame. So if you modify anything in the view the original DataFrame will also be modified. NOTE: There are certain operations that you cannot do with a view. For example, you cannot add/delete columns, etc. NOTE: Although this is a const method, it returns a view. So, the data could still be modified through the returned view |
Ts: The list of types for all columns. A type should be specified only once. locations: List of indices into the index column to copy data |
static void test_get_data_by_loc_slicing() { std::cout << "\nTesting get_data_by_loc()/slicing ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123450, 123455, 123450, 123449 }; std::vector<double> d1 = { 1, 2, 3, 4, 5, 6, 7 }; std::vector<double> d2 = { 8, 9, 10, 11, 12, 13, 14 }; std::vector<double> d3 = { 15, 16, 17, 18, 19, 20, 21 }; std::vector<double> d4 = { 22, 23, 24, 25 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", d4)); MyDataFrame df2 = df.get_data_by_loc<double>(Index2D<long> { 3, 6 }); MyDataFrame df3 = df.get_data_by_loc<double>(Index2D<long> { 0, 7 }); MyDataFrame df4 = df.get_data_by_loc<double>(Index2D<long> { -4, -1 }); MyDataFrame df5 = df.get_data_by_loc<double>(Index2D<long> { -4, 6 }); df.write<std::ostream, double>(std::cout); df2.write<std::ostream, double>(std::cout); df3.write<std::ostream, double>(std::cout); df4.write<std::ostream, double>(std::cout); df5.write<std::ostream, double>(std::cout); try { MyDataFrame df2 = df.get_data_by_loc<double>(Index2D<long> { 3, 8 }); } catch (const BadRange &ex) { std::cout << "Caught: " << ex.what() << std::endl; } try { MyDataFrame df2 = df.get_data_by_loc<double>(Index2D<long> { -8, -1 }); } catch (const BadRange &ex) { std::cout << "Caught: " << ex.what() << std::endl; } } // ----------------------------------------------------------------------------- static void test_get_view_by_loc() { std::cout << "\nTesting get_view_by_loc() ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123450, 123455, 123450, 123449 }; std::vector<double> d1 = { 1, 2, 3, 4, 5, 6, 7 }; std::vector<double> d2 = { 8, 9, 10, 11, 12, 13, 14 }; std::vector<double> d3 = { 15, 16, 17, 18, 19, 20, 21 }; std::vector<double> d4 = { 22, 23, 24, 25 }; std::vector<std::string> s1 = { "11", "22", "33", "xx", "yy", "gg", "string" }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", d4), std::make_pair("col_str", s1)); auto memory_use1 = df.get_memory_usage<double>("col_3"); std::cout << "DataFrame Memory Usage:\n" << memory_use1 << std::endl; typedef DataFrameView<unsigned long> MyDataFrameView; MyDataFrameView dfv = df.get_view_by_loc<double, std::string>(Index2D<long> { 3, 6 }); dfv.shrink_to_fit<double, std::string>(); dfv.write<std::ostream, double, std::string>(std::cout); dfv.get_column<double>("col_3")[0] = 88.0; assert(dfv.get_column<double>("col_3")[0] == df.get_column<double>("col_3")[3]); assert(dfv.get_column<double>("col_3")[0] == 88.0); auto memory_use2 = dfv.get_memory_usage<double>("col_3"); std::cout << "View Memory Usage:\n" << memory_use2 << std::endl; } // ----------------------------------------------------------------------------- static void test_get_data_by_loc_location() { std::cout << "\nTesting get_data_by_loc(locations) ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123450, 123455, 123450, 123449 }; std::vector<double> d1 = { 1, 2, 3, 4, 5, 6, 7 }; std::vector<double> d2 = { 8, 9, 10, 11, 12, 13, 14 }; std::vector<double> d3 = { 15, 16, 17, 18, 19, 20, 21 }; std::vector<double> d4 = { 22, 23, 24, 25 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", d4)); MyDataFrame df2 = df.get_data_by_loc<double>(std::vector<long> { 3, 6 }); MyDataFrame df3 = df.get_data_by_loc<double>(std::vector<long> { -4, -1 , 5}); assert(df2.get_index().size() == 2); assert(df2.get_column<double>("col_3").size() == 2); assert(df2.get_column<double>("col_2").size() == 2); assert(df2.get_index()[0] == 123450); assert(df2.get_index()[1] == 123449); assert(df2.get_column<double>("col_3")[0] == 18.0); assert(df2.get_column<double>("col_2")[1] == 14.0); assert(std::isnan(df2.get_column<double>("col_4")[1])); assert(df3.get_index().size() == 3); assert(df3.get_column<double>("col_3").size() == 3); assert(df3.get_column<double>("col_2").size() == 3); assert(df3.get_column<double>("col_1").size() == 3); assert(df3.get_index()[0] == 123450); assert(df3.get_index()[1] == 123449); assert(df3.get_index()[2] == 123450); assert(df3.get_column<double>("col_1")[0] == 4.0); assert(df3.get_column<double>("col_2")[2] == 13.0); assert(df3.get_column<double>("col_4")[0] == 25.0); assert(std::isnan(df3.get_column<double>("col_4")[1])); assert(std::isnan(df3.get_column<double>("col_4")[2])); } // ----------------------------------------------------------------------------- static void test_get_view_by_loc_location() { std::cout << "\nTesting get_view_by_loc(locations) ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123450, 123455, 123450, 123449 }; std::vector<double> d1 = { 1, 2, 3, 4, 5, 6, 7 }; std::vector<double> d2 = { 8, 9, 10, 11, 12, 13, 14 }; std::vector<double> d3 = { 15, 16, 17, 18, 19, 20, 21 }; std::vector<double> d4 = { 22, 23, 24, 25 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", d4)); auto dfv1 = df.get_view_by_loc<double>(std::vector<long> { 3, 6 }); auto dfv2 = df.get_view_by_loc<double>(std::vector<long> { -4, -1 , 5}); assert(dfv1.get_index().size() == 2); assert(dfv1.get_column<double>("col_3").size() == 2); assert(dfv1.get_column<double>("col_2").size() == 2); assert(dfv1.get_index()[0] == 123450); assert(dfv1.get_index()[1] == 123449); assert(dfv1.get_column<double>("col_3")[0] == 18.0); assert(dfv1.get_column<double>("col_2")[1] == 14.0); assert(std::isnan(dfv1.get_column<double>("col_4")[1])); assert(dfv2.get_index().size() == 3); assert(dfv2.get_column<double>("col_3").size() == 3); assert(dfv2.get_column<double>("col_2").size() == 3); assert(dfv2.get_column<double>("col_1").size() == 3); assert(dfv2.get_index()[0] == 123450); assert(dfv2.get_index()[1] == 123449); assert(dfv2.get_index()[2] == 123450); assert(dfv2.get_column<double>("col_1")[0] == 4.0); assert(dfv2.get_column<double>("col_2")[2] == 13.0); assert(dfv2.get_column<double>("col_4")[0] == 25.0); assert(std::isnan(dfv2.get_column<double>("col_4")[1])); assert(std::isnan(dfv2.get_column<double>("col_4")[2])); dfv2.get_column<double>("col_1")[0] = 101.0; assert(dfv2.get_column<double>("col_1")[0] == 101.0); assert(df.get_column<double>("col_1")[3] == 101.0); auto memory_use = dfv2.get_memory_usage<double>("col_3"); std::cout << "View Memory Usage:\n" << memory_use << std::endl; }