made eomonth parsing more intelligent

Corrected tests and code to account for the same
This commit is contained in:
Gourav Kumar 2022-05-12 10:40:47 +05:30
parent 7e524ccf7a
commit 5512a647ad
4 changed files with 60 additions and 10 deletions

View File

@ -14,6 +14,7 @@ from .utils import (
FincalOptions,
_find_closest_date,
_interval_to_years,
_is_eomonth,
_preprocess_match_options,
)
@ -146,7 +147,7 @@ class TimeSeries(TimeSeriesCore):
return res_string.format(self.start_date, self.end_date, total_dates)
def ffill(
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = False
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = None
) -> TimeSeries | None:
"""Forward fill missing dates in the time series
@ -165,6 +166,8 @@ class TimeSeries(TimeSeriesCore):
-------
Returns a TimeSeries object if inplace is False, otherwise None
"""
if eomonth is None:
eomonth = _is_eomonth(self.dates)
dates_to_fill = create_date_series(
self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends
@ -190,7 +193,7 @@ class TimeSeries(TimeSeriesCore):
return self.__class__(new_ts, frequency=self.frequency.symbol)
def bfill(
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = False
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = None
) -> TimeSeries | None:
"""Backward fill missing dates in the time series
@ -209,6 +212,8 @@ class TimeSeries(TimeSeriesCore):
-------
Returns a TimeSeries object if inplace is False, otherwise None
"""
if eomonth is None:
eomonth = _is_eomonth(self.dates)
dates_to_fill = create_date_series(
self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends

View File

@ -2,6 +2,8 @@ import datetime
from dataclasses import dataclass
from typing import List, Literal, Mapping, Sequence, Tuple
from dateutil.relativedelta import relativedelta
from .exceptions import DateNotFoundError, DateOutOfRangeError
@ -174,3 +176,14 @@ def _interval_to_years(interval_type: Literal["years", "months", "day"], interva
year_conversion_factor: dict = {"years": 1, "months": 12, "days": 365}
years: float = interval_value / year_conversion_factor[interval_type]
return years
def _is_eomonth(dates: Sequence[datetime.datetime], threshold: float = 0.7):
"""Checks if a series is should be treated as end of month date series or not.
If eomonth dates exceed threshold percentage, it will be treated as eomonth series.
This can be used for any frequency, but will work only for monthly and lower frequencies.
"""
eomonth_dates = [date.month != (date + relativedelta(days=1)).month for date in dates]
eomonth_proportion = sum(eomonth_dates) / len(dates)
return eomonth_proportion > threshold

View File

@ -56,6 +56,7 @@ def create_prices(s0: float, mu: float, sigma: float, num_prices: int) -> list:
def sample_data_generator(
frequency: fc.Frequency,
start_date: datetime.date = datetime.date(2017, 1, 1),
num: int = 1000,
skip_weekends: bool = False,
mu: float = 0.1,
@ -87,7 +88,6 @@ def sample_data_generator(
Returns a TimeSeries object
"""
start_date = datetime.datetime(2017, 1, 1)
timedelta_dict = {
frequency.freq_type: int(
frequency.value * num * (7 / 5 if frequency == fc.AllFrequencies.D and skip_weekends else 1)

View File

@ -198,6 +198,23 @@ class TestTimeSeriesBasics:
assert "2017-08-31" in bf
assert bf["2017-08-31"][1] == bf["2017-09-30"][1]
def test_fill_quarterly(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.Q, num=10, eomonth=True)
ts_data.pop(2)
ts_data.pop(6)
ts = TimeSeries(ts_data, frequency="Q")
assert len(ts) == 8
ff = ts.ffill()
assert len(ff) == 10
assert "2017-07-31" in ff
assert ff["2017-07-31"][1] == ff["2017-04-30"][1]
bf = ts.bfill()
assert len(bf) == 10
assert "2018-10-31" in bf
assert bf["2018-10-31"][1] == bf["2019-01-31"][1]
class TestReturns:
def test_returns_calc(self, create_test_data):
@ -268,13 +285,13 @@ class TestReturns:
ts.calculate_returns("2020-11-25", return_period_unit="days", return_period_value=90, closest_max_days=10)
def test_rolling_returns(self):
# Yet to be written
# To-do
return True
class TestExpand:
def test_weekly_to_daily(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, 10)
ts_data = create_test_data(AllFrequencies.W, num=10)
ts = TimeSeries(ts_data, "W")
expanded_ts = ts.expand("D", "ffill")
assert len(expanded_ts) == 64
@ -282,7 +299,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_weekly_to_daily_no_weekends(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, 10)
ts_data = create_test_data(AllFrequencies.W, num=10)
ts = TimeSeries(ts_data, "W")
expanded_ts = ts.expand("D", "ffill", skip_weekends=True)
assert len(expanded_ts) == 46
@ -290,7 +307,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_monthly_to_daily(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, 6)
ts_data = create_test_data(AllFrequencies.M, num=6)
ts = TimeSeries(ts_data, "M")
expanded_ts = ts.expand("D", "ffill")
assert len(expanded_ts) == 152
@ -298,7 +315,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_monthly_to_daily_no_weekends(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, 6)
ts_data = create_test_data(AllFrequencies.M, num=6)
ts = TimeSeries(ts_data, "M")
expanded_ts = ts.expand("D", "ffill", skip_weekends=True)
assert len(expanded_ts) == 109
@ -306,7 +323,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_monthly_to_weekly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, 6)
ts_data = create_test_data(AllFrequencies.M, num=6)
ts = TimeSeries(ts_data, "M")
expanded_ts = ts.expand("W", "ffill")
assert len(expanded_ts) == 22
@ -314,7 +331,7 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_yearly_to_monthly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.Y, 5)
ts_data = create_test_data(AllFrequencies.Y, num=5)
ts = TimeSeries(ts_data, "Y")
expanded_ts = ts.expand("M", "ffill")
assert len(expanded_ts) == 49
@ -322,6 +339,21 @@ class TestExpand:
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
class TestShrink:
# To-do
pass
class TestMeanReturns:
# To-do
pass
class TestReadCsv:
# To-do
pass
class TestReturnsAgain:
data = [
("2020-01-01", 10),