made eomonth parsing more intelligent

Corrected tests and code to account for the same
This commit is contained in:
Gourav Kumar 2022-05-12 10:40:47 +05:30
parent 7e524ccf7a
commit 5512a647ad
4 changed files with 60 additions and 10 deletions

View File

@ -14,6 +14,7 @@ from .utils import (
FincalOptions, FincalOptions,
_find_closest_date, _find_closest_date,
_interval_to_years, _interval_to_years,
_is_eomonth,
_preprocess_match_options, _preprocess_match_options,
) )
@ -146,7 +147,7 @@ class TimeSeries(TimeSeriesCore):
return res_string.format(self.start_date, self.end_date, total_dates) return res_string.format(self.start_date, self.end_date, total_dates)
def ffill( def ffill(
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = False self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = None
) -> TimeSeries | None: ) -> TimeSeries | None:
"""Forward fill missing dates in the time series """Forward fill missing dates in the time series
@ -165,6 +166,8 @@ class TimeSeries(TimeSeriesCore):
------- -------
Returns a TimeSeries object if inplace is False, otherwise None Returns a TimeSeries object if inplace is False, otherwise None
""" """
if eomonth is None:
eomonth = _is_eomonth(self.dates)
dates_to_fill = create_date_series( dates_to_fill = create_date_series(
self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends
@ -190,7 +193,7 @@ class TimeSeries(TimeSeriesCore):
return self.__class__(new_ts, frequency=self.frequency.symbol) return self.__class__(new_ts, frequency=self.frequency.symbol)
def bfill( def bfill(
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = False self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = None
) -> TimeSeries | None: ) -> TimeSeries | None:
"""Backward fill missing dates in the time series """Backward fill missing dates in the time series
@ -209,6 +212,8 @@ class TimeSeries(TimeSeriesCore):
------- -------
Returns a TimeSeries object if inplace is False, otherwise None Returns a TimeSeries object if inplace is False, otherwise None
""" """
if eomonth is None:
eomonth = _is_eomonth(self.dates)
dates_to_fill = create_date_series( dates_to_fill = create_date_series(
self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends

View File

@ -2,6 +2,8 @@ import datetime
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Literal, Mapping, Sequence, Tuple from typing import List, Literal, Mapping, Sequence, Tuple
from dateutil.relativedelta import relativedelta
from .exceptions import DateNotFoundError, DateOutOfRangeError from .exceptions import DateNotFoundError, DateOutOfRangeError
@ -174,3 +176,14 @@ def _interval_to_years(interval_type: Literal["years", "months", "day"], interva
year_conversion_factor: dict = {"years": 1, "months": 12, "days": 365} year_conversion_factor: dict = {"years": 1, "months": 12, "days": 365}
years: float = interval_value / year_conversion_factor[interval_type] years: float = interval_value / year_conversion_factor[interval_type]
return years return years
def _is_eomonth(dates: Sequence[datetime.datetime], threshold: float = 0.7):
"""Checks if a series is should be treated as end of month date series or not.
If eomonth dates exceed threshold percentage, it will be treated as eomonth series.
This can be used for any frequency, but will work only for monthly and lower frequencies.
"""
eomonth_dates = [date.month != (date + relativedelta(days=1)).month for date in dates]
eomonth_proportion = sum(eomonth_dates) / len(dates)
return eomonth_proportion > threshold

View File

@ -56,6 +56,7 @@ def create_prices(s0: float, mu: float, sigma: float, num_prices: int) -> list:
def sample_data_generator( def sample_data_generator(
frequency: fc.Frequency, frequency: fc.Frequency,
start_date: datetime.date = datetime.date(2017, 1, 1),
num: int = 1000, num: int = 1000,
skip_weekends: bool = False, skip_weekends: bool = False,
mu: float = 0.1, mu: float = 0.1,
@ -87,7 +88,6 @@ def sample_data_generator(
Returns a TimeSeries object Returns a TimeSeries object
""" """
start_date = datetime.datetime(2017, 1, 1)
timedelta_dict = { timedelta_dict = {
frequency.freq_type: int( frequency.freq_type: int(
frequency.value * num * (7 / 5 if frequency == fc.AllFrequencies.D and skip_weekends else 1) frequency.value * num * (7 / 5 if frequency == fc.AllFrequencies.D and skip_weekends else 1)

View File

@ -198,6 +198,23 @@ class TestTimeSeriesBasics:
assert "2017-08-31" in bf assert "2017-08-31" in bf
assert bf["2017-08-31"][1] == bf["2017-09-30"][1] assert bf["2017-08-31"][1] == bf["2017-09-30"][1]
def test_fill_quarterly(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.Q, num=10, eomonth=True)
ts_data.pop(2)
ts_data.pop(6)
ts = TimeSeries(ts_data, frequency="Q")
assert len(ts) == 8
ff = ts.ffill()
assert len(ff) == 10
assert "2017-07-31" in ff
assert ff["2017-07-31"][1] == ff["2017-04-30"][1]
bf = ts.bfill()
assert len(bf) == 10
assert "2018-10-31" in bf
assert bf["2018-10-31"][1] == bf["2019-01-31"][1]
class TestReturns: class TestReturns:
def test_returns_calc(self, create_test_data): def test_returns_calc(self, create_test_data):
@ -268,13 +285,13 @@ class TestReturns:
ts.calculate_returns("2020-11-25", return_period_unit="days", return_period_value=90, closest_max_days=10) ts.calculate_returns("2020-11-25", return_period_unit="days", return_period_value=90, closest_max_days=10)
def test_rolling_returns(self): def test_rolling_returns(self):
# Yet to be written # To-do
return True return True
class TestExpand: class TestExpand:
def test_weekly_to_daily(self, create_test_data): def test_weekly_to_daily(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, 10) ts_data = create_test_data(AllFrequencies.W, num=10)
ts = TimeSeries(ts_data, "W") ts = TimeSeries(ts_data, "W")
expanded_ts = ts.expand("D", "ffill") expanded_ts = ts.expand("D", "ffill")
assert len(expanded_ts) == 64 assert len(expanded_ts) == 64
@ -282,7 +299,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1] assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_weekly_to_daily_no_weekends(self, create_test_data): def test_weekly_to_daily_no_weekends(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, 10) ts_data = create_test_data(AllFrequencies.W, num=10)
ts = TimeSeries(ts_data, "W") ts = TimeSeries(ts_data, "W")
expanded_ts = ts.expand("D", "ffill", skip_weekends=True) expanded_ts = ts.expand("D", "ffill", skip_weekends=True)
assert len(expanded_ts) == 46 assert len(expanded_ts) == 46
@ -290,7 +307,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1] assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_monthly_to_daily(self, create_test_data): def test_monthly_to_daily(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, 6) ts_data = create_test_data(AllFrequencies.M, num=6)
ts = TimeSeries(ts_data, "M") ts = TimeSeries(ts_data, "M")
expanded_ts = ts.expand("D", "ffill") expanded_ts = ts.expand("D", "ffill")
assert len(expanded_ts) == 152 assert len(expanded_ts) == 152
@ -298,7 +315,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1] assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_monthly_to_daily_no_weekends(self, create_test_data): def test_monthly_to_daily_no_weekends(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, 6) ts_data = create_test_data(AllFrequencies.M, num=6)
ts = TimeSeries(ts_data, "M") ts = TimeSeries(ts_data, "M")
expanded_ts = ts.expand("D", "ffill", skip_weekends=True) expanded_ts = ts.expand("D", "ffill", skip_weekends=True)
assert len(expanded_ts) == 109 assert len(expanded_ts) == 109
@ -306,7 +323,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1] assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_monthly_to_weekly(self, create_test_data): def test_monthly_to_weekly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, 6) ts_data = create_test_data(AllFrequencies.M, num=6)
ts = TimeSeries(ts_data, "M") ts = TimeSeries(ts_data, "M")
expanded_ts = ts.expand("W", "ffill") expanded_ts = ts.expand("W", "ffill")
assert len(expanded_ts) == 22 assert len(expanded_ts) == 22
@ -314,7 +331,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1] assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_yearly_to_monthly(self, create_test_data): def test_yearly_to_monthly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.Y, 5) ts_data = create_test_data(AllFrequencies.Y, num=5)
ts = TimeSeries(ts_data, "Y") ts = TimeSeries(ts_data, "Y")
expanded_ts = ts.expand("M", "ffill") expanded_ts = ts.expand("M", "ffill")
assert len(expanded_ts) == 49 assert len(expanded_ts) == 49
@ -322,6 +339,21 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1] assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
class TestShrink:
# To-do
pass
class TestMeanReturns:
# To-do
pass
class TestReadCsv:
# To-do
pass
class TestReturnsAgain: class TestReturnsAgain:
data = [ data = [
("2020-01-01", 10), ("2020-01-01", 10),