Signature | Description | Parameters |
---|---|---|
#include <DataFrame/DataFrameMLVisitors.h> template<typename T, typename I = unsigned long, std::size_t A = 0> struct PolicyLearningLossVisitor; // ------------------------------------- template<typename T, typename I = unsigned long, std::size_t A = 0> using plloss_v = PolicyLearningLossVisitor<T, I, A>; |
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface. This functor calculates loss function of policy learning. It requires two columns action probability and reward. The formula is: L = -log(P(state|action)) * reward |
T: Column data type. I: Index type. A: Memory alignment boundary for vectors. Default is system default alignment |
static void test_PolicyLearningLossVisitor() { std::cout << "\nTesting PolicyLearningLossVisitor{ } ..." << std::endl; MyDataFrame df; StlVecType<unsigned long> idxvec = { 1, 2, 3, 10, 5, 7, 8, 12, 9, 12, 10, 13, 10, 15, 14 }; StlVecType<double> dblvec = { 0.01, 0.5, 0.35, 0.1, 0.11, 0.05, 0.06, 0.03, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.08}; StlVecType<double> dblvec2 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; StlVecType<double> dblvec3 = { 0, 1, -2, 3, 4, 5, 6, 7, -8, 9, 10, -11, 12, -13, 14}; df.load_data(std::move(idxvec), std::make_pair("action_prob", dblvec), std::make_pair("reward", dblvec2), std::make_pair("dbl_col_3", dblvec3)); plloss_v<double, unsigned long, 256> pll; df.single_act_visit<double, double>("action_prob", "reward", pll); assert(std::abs(pll.get_result()[0] - 4.6052) < 0.0001); assert(std::abs(pll.get_result()[6] - 19.6939) < 0.0001); assert(std::abs(pll.get_result()[14] - 37.8859) < 0.0001); }