Signature Description Parameters
#include <DataFrame/DataFrameMLVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct PolicyLearningLossVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using plloss_v = PolicyLearningLossVisitor<T, I, A>;
        
This is a “single action visitor”, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor calculates loss function of policy learning. It requires two columns action probability and reward.
The formula is: L = -log(P(state|action)) * reward
T: Column data type.
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
static void test_PolicyLearningLossVisitor()  {

    std::cout << "\nTesting PolicyLearningLossVisitor{  } ..." << std::endl;

    MyDataFrame                df;
    StlVecType<unsigned long>  idxvec = { 1, 2, 3, 10, 5, 7, 8, 12, 9, 12, 10, 13, 10, 15, 14 };
    StlVecType<double>         dblvec = { 0.01, 0.5, 0.35, 0.1, 0.11, 0.05, 0.06, 0.03, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.08};
    StlVecType<double>         dblvec2 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
    StlVecType<double>         dblvec3 = { 0, 1, -2, 3, 4, 5, 6, 7, -8, 9, 10, -11, 12, -13, 14};

    df.load_data(std::move(idxvec),
                 std::make_pair("action_prob", dblvec),
                 std::make_pair("reward", dblvec2),
                 std::make_pair("dbl_col_3", dblvec3));

    plloss_v<double, unsigned long, 256>    pll;

    df.single_act_visit<double, double>("action_prob", "reward", pll);
    assert(std::abs(pll.get_result()[0] - 4.6052) < 0.0001);
    assert(std::abs(pll.get_result()[6] - 19.6939) < 0.0001);
    assert(std::abs(pll.get_result()[14] - 37.8859) < 0.0001);
}
C++ DataFrame