Browse Source

Frequency validation and automatic assignment during creation

modified tests to account for the changes.
find_closest_changes
Gourav Kumar 2 years ago
parent
commit
c605f71f10
  1. 90
      fincal/core.py
  2. 20
      fincal/fincal.py
  3. 4
      tests/test_core.py

90
fincal/core.py

@ -6,7 +6,17 @@ import warnings
from collections import UserList
from dataclasses import dataclass
from numbers import Number
from typing import Any, Callable, Iterable, List, Literal, Mapping, Sequence, Type
from typing import (
Any,
Callable,
Iterable,
List,
Literal,
Mapping,
Sequence,
Tuple,
Type,
)
from dateutil.relativedelta import relativedelta
@ -289,6 +299,51 @@ class Series(UserList):
return self.__class__([i + other for i in self], self.dtype.__name__)
def _validate_frequency(
data: List[Tuple[datetime.datetime, float]], provided_frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None
):
"""Checks the data and returns the expected frequency."""
if provided_frequency is not None:
provided_frequency = getattr(AllFrequencies, provided_frequency)
start_date = data[0][0]
end_date = data[-1][0]
overall_gap = (end_date - start_date).days
num_data_points = len(data)
# days_per_data = num_data_points / overall_gap
expected_data_points = {
"D": (round(overall_gap * 0.6, 0), round(overall_gap * 1.05 + 1, 0)),
"W": (round(overall_gap / 7 * 0.7, 0), round(overall_gap / 7 * 1.05 + 1, 0)),
"M": (round(overall_gap / 30 * 0.8, 0), round(overall_gap / 30 * 1.05 + 1, 0)),
"Q": (round(overall_gap / 92 * 0.85, 0), round(overall_gap / 92 * 1.05 + 1, 0)),
"H": (round(overall_gap / 182 * 0.85, 0), round(overall_gap / 182 * 1.05 + 1, 0)),
"A": (round(overall_gap / 365 * 0.85, 0), round(overall_gap / 365 * 1.05 + 1, 0)),
}
for frequency, (min, max) in expected_data_points.items():
if min <= num_data_points <= max:
expected_frequency = frequency
break
else:
raise ValueError("Data does not match any known frequency. Perhaps you have too many missing data points.")
expected_data_points = expected_data_points[expected_frequency]
if provided_frequency is None:
frequency_match = None
elif provided_frequency.symbol == expected_frequency:
frequency_match = True
else:
frequency_match = False
return {
"gap": overall_gap,
"expected_data_points": expected_data_points,
"actual_data_points": num_data_points,
"expected_frequency": expected_frequency,
"frequency_match": frequency_match,
}
@Mapping.register
class TimeSeriesCore:
"""Defines the core building blocks of a TimeSeries object"""
@ -296,7 +351,8 @@ class TimeSeriesCore:
def __init__(
self,
ts_data: List[Iterable] | Mapping,
frequency: Literal["D", "W", "M", "Q", "H", "Y"],
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
validate_frequency: bool = True,
date_format: str = "%Y-%m-%d",
):
"""Instantiate a TimeSeriesCore object
@ -308,9 +364,21 @@ class TimeSeriesCore:
The first element of each tuple should be a date and second element should be a value.
In case of dictionary, the key should be the date.
frequency : str
frequency : str, optional
The frequency of the time series.
Valid values are {D, W, M, Q, H, Y}
If no frequency is provided, it will be inferred from the data.
Frequency assignment uses approximation and hence the assignment may be incorrect if
there are fewer than 12 data points.
validate_frequency: boolean, default True
Whether the provided frequency should be validated against the data.
When set to True, if the expected number of data points are not withint the expected limits,
it will raise an Exception and object creation will fail.
Validation is performed only if data contains at least 12 data points, as a fewer number of
data points are not sufficient to determine the frequency correctly.
This parameter will be ignored if frequency is not provided.
refer core._validate_frequency for more details.
date_format : str, optional, default "%Y-%m-%d"
Specify the format of the date
@ -319,10 +387,24 @@ class TimeSeriesCore:
ts_data = _preprocess_timeseries(ts_data, date_format=date_format)
validation = _validate_frequency(data=ts_data, provided_frequency=frequency)
if frequency is None:
frequency = validation["expected_frequency"]
self.frequency = getattr(AllFrequencies, frequency)
if validate_frequency and len(ts_data) >= 12:
if validation["frequency_match"] is not None and not validation["frequency_match"]:
raise ValueError(
f"Data appears to be of frquency {validation['expected_frequency']!r}, "
f"but {frequency!r} was provided. Pass the correct frequency."
"\nPass validate_frequency=False to disable this validation."
)
self.data = dict(ts_data)
if len(self.data) != len(ts_data):
warnings.warn("The input data contains duplicate dates which have been ignored.")
self.frequency: Frequency = getattr(AllFrequencies, frequency)
# self.frequency: Frequency = getattr(AllFrequencies, frequency)
self.iter_num: int = -1
self._dates: list = None
self._values: list = None

20
fincal/fincal.py

@ -118,26 +118,34 @@ class TimeSeries(TimeSeriesCore):
* List of dictionaries with 2 keys, first representing date & second representing value
* Dictionary of key: value pairs
date_format : str, optional, default "%Y-%m-%d"
Specify the format of the date
Required only if the first argument of tuples is a string. Otherwise ignored.
frequency : str, optional, default "infer"
The frequency of the time series. Default is infer.
The class will try to infer the frequency automatically and adjust to the closest member.
Note that inferring frequencies can fail if the data is too irregular.
Valid values are {D, W, M, Q, H, Y}
validate_frequency: boolean, default True
Whether the provided frequency should be validated against the data.
When set to True, if the expected number of data points are not withint the expected limits,
it will raise an Exception and object creation will fail.
This parameter will be ignored if frequency is not provided.
refer core._validate_frequency for more details.
date_format : str, optional, default "%Y-%m-%d"
Specify the format of the date
Required only if the first argument of tuples is a string. Otherwise ignored.
"""
def __init__(
self,
data: List[Iterable] | Mapping,
frequency: Literal["D", "W", "M", "Q", "H", "Y"],
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
validate_frequency: bool = False,
date_format: str = "%Y-%m-%d",
):
"""Instantiate a TimeSeriesCore object"""
super().__init__(data, frequency, date_format)
super().__init__(data, frequency, validate_frequency, date_format)
def info(self) -> str:
"""Summary info about the TimeSeries object"""

4
tests/test_core.py

@ -183,7 +183,7 @@ class TestSetitem:
data = [("2021-01-01", 220), ("2021-01-04", 230), ("2021-03-07", 240)]
def test_setitem(self):
ts = TimeSeriesCore(self.data, frequency="D")
ts = TimeSeriesCore(self.data, frequency="M")
assert len(ts) == 3
ts["2021-01-02"] = 225
@ -195,7 +195,7 @@ class TestSetitem:
assert ts["2021-01-02"][1] == 227.6
def test_errors(self):
ts = TimeSeriesCore(self.data, frequency="D")
ts = TimeSeriesCore(self.data, frequency="M")
with pytest.raises(TypeError):
ts["2021-01-03"] = "abc"

Loading…
Cancel
Save