Signature Description

enum class fill_policy : unsigned char  {
    // Fill all missing values with the given substitute
    //
    value = 1,

    // Fill the missing values with the previous value
    //
    fill_forward = 2,

    // Fill the missing values with the next value
    //
    fill_backward = 3,

    //           X - X1
    // Y = Y1 + --------- * (Y2 - Y1)
    //           X2 - X1
    // Use the index column as X coordinate and the given column as Y coordinate
    //
    linear_interpolate = 4,
    linear_extrapolate = 5,

    // Fill missing values with mid-point of surrounding values
    //
    mid_point = 6,
};
This policy determines how to fill missing values in the DataFrame

Signature Description Parameters

template<typename T, size_t N>
void
fill_missing(const std::array col_names,
             fill_policy policy,
             const std::array values = { },
             int limit = -1);
        
It fills all the "missing values" with the given values, and/or using the given method (See fill_policy above). Missing is determined by being NaN for types that have NaN. For types without NaN (e.g. string), default value is considered missing value T: Type of the column(s) in col_names array
N: Size of col_names and values array col_names: An array of names specifying the columns to fill.
policy: Specifies the method to use to fill the missing values. For example; forward fill, values, etc.
values: If the policy is "values", use these values to fill the missing holes. Each value corresponds to the same index in the col_names array.
limit: Specifies how many values to fill. Default is -1 meaning fill all missing values.
Signature Description Parameters

template<typename DF, typename ... Ts>
void
fill_missing(const DF &rhs);
        
It fills the missing values in all columns in self by investigating the rhs DataFrame. It attempts to find columns with the same name and type in rhs. If there are such columns in rhs, it fills the missing values in the corresponding columns in self that also have the same index value.

NOTE: This means that self and rhs must be aligned/ordered the same way for all common columns including index column. Otherwise, the result is either nonsense (or not applied, if index column is in a different order).
NOTE: Self and rhs must have the same type index. The == operator must be well defined on the index type.
NOTE: This method does not extend any of the columns in self. It just fills the holes, if data is present in rhs.
Ts: List all the types of all data columns. A type should be specified in the list only once.
rhs: DataFrame to be used to find the missing values in self
static void test_fill_missing_values()  {

    std::cout << "\nTesting fill_missing(values) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo" };

    df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans);

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::value, { 1001, 1002, 1003 }, 3);
    df.fill_missing<std::string, 1>({ "col_str" }, fill_policy::value, { "XXXXXX" });

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_fill_missing_fill_forward()  {

    std::cout << "\nTesting fill_missing(fill_forward) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo" };

    df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans);

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::fill_forward, { }, 3);
    df.fill_missing<std::string, 1>({ "col_str" }, fill_policy::fill_forward, {  }, 3);

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_fill_missing_fill_backward()  {

    std::cout << "\nTesting fill_missing(fill_backward) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo", "pppp", "2222", "aaaa", "dddd" };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_str", s1),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::fill_backward);
    df.fill_missing<std::string, 1>({ "col_str" }, fill_policy::fill_backward);

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_fill_missing_fill_linear_interpolation()  {

    std::cout << "\nTesting fill_missing(linear_interpolate) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::linear_interpolate);

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_fill_missing_mid_point()  {

    std::cout << "\nTesting fill_missing(mid_point) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo" };

    df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans);

    // std::cout << "Original DF:" << std::endl;
    // df.write<std::ostream, int, double, std::string>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::mid_point);

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_fill_missing_df()  {

    std::cout << "\nTesting fill_missing(DataFrame) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<unsigned long>  idx2 =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456,
          1234570, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d12 = { 1, 2, 3, 4, 100, 6, 7, 101, 102,
                                std::numeric_limits<double>::quiet_NaN(),
                                11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d22 = { 8, 9, 200, 11, 12, 201, 202, 20, 22, 23, 30, 31,
                                std::numeric_limits<double>::quiet_NaN(),
                                1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<double> d32 = { 300, 16, 301, 18, 19, 16, 303, 0.34, 1.56, 0.34, 2.3, 0.34 };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    std::vector<int>    xi1 = { 22,
                                std::numeric_limits<int>::quiet_NaN(),
                                std::numeric_limits<int>::quiet_NaN(),
                                25,
                                std::numeric_limits<int>::quiet_NaN() };
    std::vector<int>    i12 = { 22, 400, 401, 25,
                                std::numeric_limits<int>::quiet_NaN() };
    MyDataFrame         df;
    MyDataFrame         df2;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1),
                 std::make_pair("xcol_4", xi1));
    df2.load_data(std::move(idx2),
                  std::make_pair("col_1", d12),
                  std::make_pair("col_2", d22),
                  std::make_pair("col_3", d32),
                  std::make_pair("col_4", i12));

    std::vector<std::string>    s1 = { "qqqq", "wwww", "", "rrrr", "tttt", "", "iiii", "" };
    std::vector<std::string>    s12 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "aaaa", "bbbb", "cccc", "dddd", "gggg", "hhhh", "kkkk" };

    df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans);
    df2.load_column("col_str", std::move(s12), nan_policy::dont_pad_with_nans);

    df.fill_missing<MyDataFrame, double, int, std::string>(df2);

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}