Signature Description Parameters

bool
read(const char *file_name,
     io_format iof = io_format::csv,
     bool columns_only = false);
        
It inputs the contents of a text file/stream into itself (i.e. DataFrame). Currently 3 formats (i.e. csv, csv2, json) are supported. See io_format documentation page
CSV file format must be:
  INDEX:<Number of data points>:<Comma delimited list of values>
  <Column1 name>:<Number of data points>:<Column1 type>:<Comma delimited list of values>
  <Column2 name>:<Number of data points>:<Column2 type>:<Comma delimited list of values>
      .
      .
      .
        
All empty lines or lines starting with # will be skipped. For examples see files in test directory

CSV2 file format must be (this is similar to Pandas csv format):
  INDEX:<Number of data points>:<Index type>:,<Column1 name>:<Number of data points>:<Column1 type>,<Column2 name>:<Number of data points>:<Column2 type>, . . .
  Comma delimited rows of values
      .
      .
      .
        
All empty lines or lines starting with # will be skipped. For examples see IBM and FORD files in test directory

JSON file format looks like this:
  {
    "INDEX":{"N":3,"T":"ulong","D":[123450,123451,123452]},
    "col_3":{"N":3,"T":"double","D":[15.2,16.34,17.764]},
    "col_4":{"N":3,"T":"int","D":[22,23,24]},
    "col_str":{"N":3,"T":"string","D":["11","22","33"]},
    "col_2":{"N":3,"T":"double","D":[8,9.001,10]},
    "col_1":{"N":3,"T":"double","D":[1,2,3.456]}
  }
        
Please note DataFrame json does not follow json spec 100%. In json, there is no particular order in dictionary fields. But in DataFrame json:
  1. Column “INDEX” must be the first column, if it exists
  2. Fields in column dictionaries must be in N (name), T (type), D (data) order


In all formats the following data types are supported:
          float
          double
          longdouble  -- long double
          int
          uint        -- unsigned int
          long
          longlong    -- long long int
          ulong       -- unsigned long
          ulonglong   -- unsigned long long int
          string
          bool
          DateTime    -- DateTime data in format of <Epoch seconds>.<nanoseconds> (1516179600.874123908)
        
In case of io_format::csv2 the following additional types are also supported:
          DateTimeAME -- DateTime string printed in American style (MM/DD/YYYY HH:MM:SS.mmm)
          DateTimeEUR -- DateTime string printed in European style (YYYY/MM/DD HH:MM:SS.mmm)
          DateTimeISO -- DateTime string printed in ISO style (YYYY-MM-DD HH:MM:SS.mmm)
        
file_name: Complete path to the file
iof: Specifies the I/O format. The default is CSV
columns_only: If true, the index column is not read.
              You may want to do that to read multiple files into the same DataFrame.
              If columns_only is false the index column must exist in the stream.
              If columns_only is true the index column may or may not exist
        

template<typename S>
bool
read(S &in_s,
     io_format iof = io_format::csv,
     bool columns_only = false);
        
Same as read() above, but takes a reference to a stream

std::future<bool>
read_async(const char *file_name,
           io_format iof = io_format::csv,
           bool columns_only = false);
        
Same as read() above, but executed asynchronously

template<typename S>
std::future<bool>
read_async(S &in_s,
           io_format iof = io_format::csv,
           bool columns_only = false);
        
Same as read_async() above, but takes a reference to a stream
static void test_read()  {

    std::cout << "\nTesting read() ..." << std::endl;

    MyDataFrame df_read;

    try  {
        std::future<bool>   fut2 = df_read.read_async("sample_data.csv");

        fut2.get();
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
    }
    df_read.write<std::ostream, int, unsigned long, double, std::string, bool>(std::cout);

    StdDataFrame<std::string>   df_read_str;

    try  {
        df_read_str.read("sample_data_string_index.csv");
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
    }
    df_read_str.write<std::ostream, int, unsigned long, double, std::string, bool>(std::cout);

    StdDataFrame<DateTime>  df_read_dt;

    try  {
        df_read_dt.read("sample_data_dt_index.csv");
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
    }
    df_read_dt.write<std::ostream, int, unsigned long, double, std::string, bool>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_io_format_csv2()  {

    std::cout << "\nTesting io_format_csv2( ) ..." << std::endl;

    std::vector<unsigned long>  ulgvec2 =
        { 123450, 123451, 123452, 123450, 123455, 123450, 123449, 123450, 123451, 123450, 123452, 123450, 123455, 123450,
          123454, 123450, 123450, 123457, 123458, 123459, 123450, 123441, 123442, 123432, 123450, 123450, 123435, 123450 };
    std::vector<unsigned long>  xulgvec2 = ulgvec2;
    std::vector<int>            intvec2 =
        { 1, 2, 3, 4, 5, 3, 7, 3, 9, 10, 3, 2, 3, 14, 2, 2, 2, 3, 2, 3, 3, 3, 3, 3, 36, 2, 45, 2 };
    std::vector<double>         xdblvec2 =
        { 1.2345, 2.2345, 3.2345, 4.2345, 5.2345, 3.0, 0.9999, 10.0, 4.25, 0.009, 8.0, 2.2222, 3.3333,
          11.0, 5.25, 1.009, 2.111, 9.0, 3.2222, 4.3333, 12.0, 6.25, 2.009, 3.111, 10.0, 4.2222, 5.3333 };
    std::vector<double>         dblvec22 =
        { 0.998, 0.3456, 0.056, 0.15678, 0.00345, 0.923, 0.06743, 0.1, 0.0056, 0.07865, 0.0111, 0.1002, -0.8888,
          0.14, 0.0456, 0.078654, -0.8999, 0.8002, -0.9888, 0.2, 0.1056, 0.87865, -0.6999, 0.4111, 0.1902, -0.4888 };
    std::vector<std::string>    strvec2 =
        { "4% of something", "Description 4/5", "This is bad", "3.4% of GDP", "Market drops", "Market pulls back",
          "$15 increase", "Running fast", "C++14 development", "Some explanation", "More strings", "Bonds vs. Equities",
          "Almost done", "XXXX04", "XXXX2", "XXXX3", "XXXX4", "XXXX4", "XXXX5", "XXXX6",
          "XXXX7", "XXXX10", "XXXX11", "XXXX02", "XXXX03" };
    std::vector<bool>           boolvec = { true, true, true, false, false, true };

    MyDataFrame df;

    df.load_data(std::move(ulgvec2), std::make_pair("ul_col", xulgvec2));
    df.load_column("xint_col", std::move(intvec2), nan_policy::dont_pad_with_nans);
    df.load_column("str_col", std::move(strvec2), nan_policy::dont_pad_with_nans);
    df.load_column("dbl_col", std::move(xdblvec2), nan_policy::dont_pad_with_nans);
    df.load_column("dbl_col_2", std::move(dblvec22), nan_policy::dont_pad_with_nans);
    df.load_column("bool_col", std::move(boolvec), nan_policy::dont_pad_with_nans);

    df.write<std::ostream, int, unsigned long, double, bool, std::string>(std::cout, false, io_format::csv2);

    MyDataFrame df_read;

    try  {
        df_read.read("csv2_format_data.csv", io_format::csv2);
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
    }
    df_read.write<std::ostream, int, unsigned long, double, bool, std::string>(std::cout, false, io_format::csv2);
}

// -----------------------------------------------------------------------------

static void test_DT_IBM_data()  {

    std::cout << "\nTesting DT_IBM_data(  ) ..." << std::endl;

    typedef StdDataFrame<DateTime>  DT_DataFrame;

    DT_DataFrame    df;

    df.read("DT_IBM.csv", io_format::csv2);

    assert(df.get_column<double>("IBM_Open")[0] == 98.4375);
    assert(df.get_column<double>("IBM_Close")[18] == 97.875);
    assert(df.get_index()[18] == DateTime(20001128));
    assert(fabs(df.get_column<double>("IBM_High")[5030] - 111.8) < 0.001);
    assert(df.get_column<long>("IBM_Volume")[5022] == 21501100L);
    assert(df.get_index()[5020] == DateTime(20201016));
}