Signature | Description |
---|---|
enum class pattern_spec : unsigned char { monotonic_increasing = 1, // i >= j strictly_monotonic_increasing = 2, // i > j monotonic_decreasing = 3, // i <= j strictly_monotonic_decreasing = 4, // i < j normally_distributed = 6, // 68%-95%-99.7% 3-sigma rule to approximate standard_normally_distributed = 7, // Same as normal + 0/1.0 check lognormally_distributed = 8, // Uses the above normal test on the log(x) + Skew and Kurtosis }; |
Enumerated type to specify different patterns to detect. |
Signature | Description | Parameters |
---|---|---|
template<typename T> bool pattern_match(const char *col_name, pattern_spec pattern, double epsilon = 0.0) const; |
This method returns true if the given column follows the given pattern, otherwise it returns false. Epsilon is used for approximation. List of supported patterns are specified in pattern_spec above. |
T: Type of the named column col_name: Type of the named colum pattern: The pattern we are looking for specified in DataFrameTypes.h epsilon: An epsilon value to use to match pattern parameters |
static void test_pattern_match() { std::cout << "\nTesting pattern_match( ) ..." << std::endl; const size_t item_cnt = 8192; MyDataFrame df; RandGenParams<double> p; p.mean = 5.6; p.std = 0.5; p.seed = 123; p.min_value = 0; p.max_value = 30; df.load_data(MyDataFrame::gen_sequence_index(0, item_cnt, 1), std::make_pair("lognormal", gen_lognormal_dist<double>(item_cnt, p)), std::make_pair("normal", gen_normal_dist<double>(item_cnt, p)), std::make_pair("uniform_real", gen_uniform_real_dist<double>(item_cnt, p))); p.mean = 0; p.std = 1.0; p.min_value = -30; p.max_value = 30; df.load_column("std_normal", gen_normal_dist<double>(item_cnt, p)); df.load_column<unsigned long>("increasing", MyDataFrame::gen_sequence_index(0, item_cnt, 1)); bool result = df.pattern_match<double>("lognormal", pattern_spec::normally_distributed, 0.01); assert(result == false); result = df.pattern_match<double>("normal", pattern_spec::normally_distributed, 0.01); assert(result == true); result = df.pattern_match<double>("std_normal", pattern_spec::standard_normally_distributed, 0.013); assert(result == true); result = df.pattern_match<double>("lognormal", pattern_spec::lognormally_distributed, 0.01); assert(result == true); result = df.pattern_match<double>("normal", pattern_spec::lognormally_distributed, 0.01); assert(result == false); result = df.pattern_match<double>("uniform_real", pattern_spec::lognormally_distributed, 1.0); assert(result == false); result = df.pattern_match<double>("uniform_real", pattern_spec::normally_distributed, 0.1); assert(result == false); result = df.pattern_match<unsigned long>("increasing", pattern_spec::monotonic_increasing); assert(result == true); result = df.pattern_match<unsigned long>("increasing", pattern_spec::strictly_monotonic_increasing); assert(result == true); df.get_column<unsigned long>("increasing")[10] = 9; result = df.pattern_match<unsigned long>("increasing", pattern_spec::monotonic_increasing); assert(result == true); result = df.pattern_match<unsigned long>("increasing", pattern_spec::strictly_monotonic_increasing); assert(result == false); df.get_column<unsigned long>("increasing")[1000] = 988; result = df.pattern_match<unsigned long>("increasing", pattern_spec::monotonic_increasing); assert(result == false); result = df.pattern_match<unsigned long>("increasing", pattern_spec::strictly_monotonic_increasing); assert(result == false); }