handled issues with frequency validation

This commit is contained in:
Gourav Kumar 2022-06-11 17:56:42 +05:30
parent 6c8800bef2
commit 3ffec7b11b
4 changed files with 33 additions and 17 deletions

View File

@ -180,7 +180,7 @@ class Series(UserList):
if len(self) != len(other): if len(self) != len(other):
raise ValueError("Length of Series must be same for comparison") raise ValueError("Length of Series must be same for comparison")
elif (self.dtype != float and isinstance(other, Number)) or not isinstance(other, self.dtype): elif self.dtype != float and isinstance(other, Number):
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}") raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return other return other
@ -300,7 +300,9 @@ class Series(UserList):
def _validate_frequency( def _validate_frequency(
data: List[Tuple[datetime.datetime, float]], provided_frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None data: List[Tuple[datetime.datetime, float]],
provided_frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
raise_error: bool = True,
): ):
"""Checks the data and returns the expected frequency.""" """Checks the data and returns the expected frequency."""
if provided_frequency is not None: if provided_frequency is not None:
@ -325,7 +327,10 @@ def _validate_frequency(
expected_frequency = frequency expected_frequency = frequency
break break
else: else:
if raise_error:
raise ValueError("Data does not match any known frequency. Perhaps you have too many missing data points.") raise ValueError("Data does not match any known frequency. Perhaps you have too many missing data points.")
else:
expected_frequency = provided_frequency.symbol
expected_data_points = expected_data_points[expected_frequency] expected_data_points = expected_data_points[expected_frequency]
if provided_frequency is None: if provided_frequency is None:
@ -387,7 +392,7 @@ class TimeSeriesCore:
ts_data = _preprocess_timeseries(ts_data, date_format=date_format) ts_data = _preprocess_timeseries(ts_data, date_format=date_format)
validation = _validate_frequency(data=ts_data, provided_frequency=frequency) validation = _validate_frequency(data=ts_data, provided_frequency=frequency, raise_error=validate_frequency)
if frequency is None: if frequency is None:
frequency = validation["expected_frequency"] frequency = validation["expected_frequency"]
@ -508,7 +513,7 @@ class TimeSeriesCore:
"""Helper function to retrieve items using a list""" """Helper function to retrieve items using a list"""
data_to_return = [self._get_item_from_key(key) for key in date_list] data_to_return = [self._get_item_from_key(key) for key in date_list]
return self.__class__(data_to_return, frequency=self.frequency.symbol) return self.__class__(data_to_return, frequency=self.frequency.symbol, validate_frequency=False)
def _get_item_from_series(self, series: Series): def _get_item_from_series(self, series: Series):
"""Helper function to retrieve item using a Series object """Helper function to retrieve item using a Series object

View File

@ -344,8 +344,8 @@ class TimeSeries(TimeSeriesCore):
@date_parser(1, 2) @date_parser(1, 2)
def calculate_rolling_returns( def calculate_rolling_returns(
self, self,
from_date: datetime.date | str, from_date: datetime.date | str = None,
to_date: datetime.date | str, to_date: datetime.date | str = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None, frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
as_on_match: str = "closest", as_on_match: str = "closest",
prior_match: str = "closest", prior_match: str = "closest",
@ -429,6 +429,13 @@ class TimeSeries(TimeSeriesCore):
frequency = getattr(AllFrequencies, frequency) frequency = getattr(AllFrequencies, frequency)
except AttributeError: except AttributeError:
raise ValueError(f"Invalid argument for frequency {frequency}") raise ValueError(f"Invalid argument for frequency {frequency}")
if from_date is None:
from_date = self.start_date + relativedelta(
days=int(_interval_to_years(return_period_unit, return_period_value) * 365 + 1)
)
if to_date is None:
to_date = self.end_date
dates = create_date_series(from_date, to_date, frequency.symbol) dates = create_date_series(from_date, to_date, frequency.symbol)
if frequency == AllFrequencies.D: if frequency == AllFrequencies.D:

View File

@ -2,6 +2,7 @@ from __future__ import annotations
import datetime import datetime
import statistics import statistics
from cmath import sqrt
from typing import Literal from typing import Literal
from pyfacts.core import date_parser from pyfacts.core import date_parser
@ -472,13 +473,14 @@ def sortino_ratio(
closest: Literal["previous", "next"] = "previous", closest: Literal["previous", "next"] = "previous",
date_format: str = None, date_format: str = None,
) -> float: ) -> float:
"""Calculate the Sharpe ratio of any time series """Calculate the Sortino ratio of any time series
Sharpe ratio is a measure of returns per unit of risk, Sortino ratio is a variation of the Sharpe ratio,
where risk is measured by the standard deviation of the returns. where risk is measured as standard deviation of negative returns only.
Since deviation on the positive side is not undesirable, hence sortino ratio excludes positive deviations.
The formula for Sharpe ratio is: The formula for Sortino ratio is:
(average asset return - risk free rate)/volatility of asset returns (average asset return - risk free rate)/volatility of negative asset returns
Parameters Parameters
---------- ----------
@ -528,7 +530,7 @@ def sortino_ratio(
Returns Returns
------- -------
Value of Sharpe ratio as a float. Value of Sortino ratio as a float.
Raises Raises
------ ------
@ -559,11 +561,13 @@ def sortino_ratio(
"closest": closest, "closest": closest,
"date_format": date_format, "date_format": date_format,
} }
average_rr_ts = time_series_data.calculate_rolling_returns(**common_params, annual_compounded_returns=True) average_rr_ts = time_series_data.calculate_rolling_returns(**common_params, annual_compounded_returns=False)
average_rr = statistics.mean(average_rr_ts.values) average_rr = statistics.mean(average_rr_ts.values)
annualized_average_rr = (1 + average_rr) ** (365 / interval_days) - 1
excess_returns = average_rr - risk_free_rate excess_returns = annualized_average_rr - risk_free_rate
sd = statistics.stdev([i for i in average_rr_ts.values if i < 0]) sd = statistics.stdev([i for i in average_rr_ts.values if i < 0])
sd *= sqrt(365 / interval_days)
sortino_ratio_value = excess_returns / sd sortino_ratio_value = excess_returns / sd
return sortino_ratio_value return sortino_ratio_value

View File

@ -1,6 +1,6 @@
import datetime import datetime
from fincal.core import Series from pyfacts.core import Series
s1 = Series([2.5, 6.2, 5.6, 8.4, 7.4, 1.5, 9.6, 5]) s1 = Series([2.5, 6.2, 5.6, 8.4, 7.4, 1.5, 9.6, 5])
@ -19,7 +19,7 @@ dt_lst = [
datetime.datetime(2020, 6, 19, 0, 0), datetime.datetime(2020, 6, 19, 0, 0),
datetime.datetime(2016, 3, 16, 0, 0), datetime.datetime(2016, 3, 16, 0, 0),
datetime.datetime(2017, 4, 25, 0, 0), datetime.datetime(2017, 4, 25, 0, 0),
datetime.datetime(2016, 7, 10, 0, 0) datetime.datetime(2016, 7, 10, 0, 0),
] ]
s2 = Series(dt_lst) s2 = Series(dt_lst)