improved expand function, handled issue with ffill

getitem with ffill was causing date-value pair to be treated as value in ffill
This commit is contained in:
Gourav Kumar 2022-04-04 23:47:34 +05:30
parent b5aa5d22d4
commit ed973cc259

View File

@ -31,7 +31,8 @@ def create_date_series(
frequency: Literal["D", "W", "M", "Q", "H", "Y"], frequency: Literal["D", "W", "M", "Q", "H", "Y"],
eomonth: bool = False, eomonth: bool = False,
skip_weekends: bool = False, skip_weekends: bool = False,
) -> List[datetime.datetime]: ensure_coverage: bool = False,
) -> Series:
"""Create a date series with a specified frequency """Create a date series with a specified frequency
Parameters Parameters
@ -52,6 +53,13 @@ def create_date_series(
Speacifies if the dates in the series should be end of month dates. Speacifies if the dates in the series should be end of month dates.
Can only be used if the frequency is Monthly or lower. Can only be used if the frequency is Monthly or lower.
skip_weekends: Boolean, default False
If set to True, dates falling on weekends will not be added to the series.
Used only when frequency is daily, weekends will necessarily be included for other frequencies.
ensure_coverage: Boolean, default False
If set to true, it will ensure the last date is greater than the end date.
Returns Returns
------- -------
List[datetime.datetime] List[datetime.datetime]
@ -67,6 +75,13 @@ def create_date_series(
if eomonth and frequency.days < AllFrequencies.M.days: if eomonth and frequency.days < AllFrequencies.M.days:
raise ValueError(f"eomonth cannot be set to True if frequency is higher than {AllFrequencies.M.name}") raise ValueError(f"eomonth cannot be set to True if frequency is higher than {AllFrequencies.M.name}")
if ensure_coverage:
if frequency.days == 1 and skip_weekends and end_date.weekday() > 4:
extend_by_days = 7 - end_date.weekday()
end_date += relativedelta(days=extend_by_days)
# To-do: Add code to ensure coverage for other frequencies as well
datediff = (end_date - start_date).days / frequency.days + 1 datediff = (end_date - start_date).days / frequency.days + 1
dates = [] dates = []
@ -160,7 +175,7 @@ class TimeSeries(TimeSeriesCore):
cur_val = self.get(cur_date, closest="previous") cur_val = self.get(cur_date, closest="previous")
except KeyError: except KeyError:
pass pass
new_ts.update({cur_date: cur_val}) new_ts.update({cur_date: cur_val[1]})
if inplace: if inplace:
self.data = new_ts self.data = new_ts
@ -566,7 +581,7 @@ class TimeSeries(TimeSeriesCore):
def expand( def expand(
self, self,
to_frequency: Literal["D", "W", "M", "Q", "H"], to_frequency: Literal["D", "W", "M", "Q", "H"],
method: Literal["ffill", "bfill", "interpolate"], method: Literal["ffill", "bfill"],
skip_weekends: bool = False, skip_weekends: bool = False,
) -> TimeSeries: ) -> TimeSeries:
try: try:
@ -578,17 +593,16 @@ class TimeSeries(TimeSeriesCore):
raise ValueError("TimeSeries can be only expanded to a higher frequency") raise ValueError("TimeSeries can be only expanded to a higher frequency")
new_dates = create_date_series( new_dates = create_date_series(
self.start_date, self.end_date, frequency=to_frequency.symbol, skip_weekends=skip_weekends self.start_date,
self.end_date,
frequency=to_frequency.symbol,
skip_weekends=skip_weekends,
ensure_coverage=True,
) )
new_ts: dict = {dt: self.get(dt, closest="previous")[1] for dt in new_dates}
output_ts: TimeSeries = TimeSeries(new_ts, frequency=to_frequency.symbol)
if method == "ffill": closest = "previous" if method == "ffill" else "next"
output_ts.ffill(inplace=True, skip_weekends=skip_weekends) new_ts: dict = {dt: self.get(dt, closest=closest)[1] for dt in new_dates}
elif method == "bfill": output_ts: TimeSeries = TimeSeries(new_ts, frequency=to_frequency.symbol)
output_ts.bfill(inplace=True, skip_weekends=skip_weekends)
else:
raise NotImplementedError(f"Method {method} not implemented")
return output_ts return output_ts