Signature | Description | Parameters |
---|---|---|
#include <DataFrame/DataFrameMLVisitors.h> template<typename T, typename I = unsigned long, std::size_t A = 0> struct AffinityPropVisitor; |
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface. This functor class finds clusters in data using Affinity Propagation algorithm. The constructor takes three parameters 1. Number of iterations 2. A function to calculate distance between to data points of type T with a default value 3. Damping factor used in the algorithm. The default is 0.9. (1 – damping factor) prevents numerical oscillations. explicit AffinityPropVisitor(std::size_t num_of_iter, distance_func f = [](const T &x, const T &y) -> double { return ((x - y) * (x - y)); }, double damping_factor = 0.9); The result type is VectorPtrView of type T containing the centers of clusters. There is also a get_clusters() method that returns an vector of VectorPtrView’s which contain the data clustered around the centers. |
T: Column data type I: Index type A: Memory alignment boundary for vectors. Default is system default alignment |
static void test_affinity_propagation() { std::cout << "\nTesting affinity propagation visitor ..." << std::endl; const size_t item_cnt = 50; MyDataFrame df; RandGenParams<double> p; std::vector<double> final_col; std::vector<double> col_data; p.seed = 3575984165U; p.min_value = 0; p.max_value = 10; col_data = gen_uniform_real_dist<double>(item_cnt, p); final_col.insert(final_col.end(), col_data.begin(), col_data.end()); p.min_value = 20; p.max_value = 30; col_data = gen_uniform_real_dist<double>(item_cnt, p); final_col.insert(final_col.end(), col_data.begin(), col_data.end()); p.min_value = 40; p.max_value = 50; col_data = gen_uniform_real_dist<double>(item_cnt, p); final_col.insert(final_col.end(), col_data.begin(), col_data.end()); p.min_value = 60; p.max_value = 70; col_data = gen_uniform_real_dist<double>(item_cnt, p); final_col.insert(final_col.end(), col_data.begin(), col_data.end()); p.min_value = 80; p.max_value = 90; col_data = gen_uniform_real_dist<double>(item_cnt, p); final_col.insert(final_col.end(), col_data.begin(), col_data.end()); df.load_data(MyDataFrame::gen_sequence_index(0, item_cnt * 5, 1), std::make_pair("col1", final_col)); df.shuffle<1, double>({"col1"}, false); KMeansVisitor<5, double> km_visitor(1000); AffinityPropVisitor<double> ap_visitor(50); df.single_act_visit<double>("col1", km_visitor); df.single_act_visit<double>("col1", ap_visitor); // Using the calculated means, separate the given column into clusters const auto k_means = km_visitor.get_result(); const auto results = ap_visitor.get_clusters(df.get_index(), df.get_column<double>("col1")); for (auto iter : k_means) { std::cout << iter << ", "; } std::cout << "\n\n" << std::endl; for (auto iter : results) { for (auto iter2 : iter) { std::cout << iter2 << ", "; } std::cout << "\n" << std::endl; } }