PyFacts/pyfacts/statistics.py

590 lines
20 KiB
Python
Raw Normal View History

2022-06-06 03:03:58 +00:00
from __future__ import annotations
2022-04-30 07:18:31 +00:00
import datetime
2022-06-12 16:05:37 +00:00
import math
import statistics
2022-04-30 07:18:31 +00:00
from typing import Literal
2022-06-05 17:36:12 +00:00
from pyfacts.core import date_parser
2022-04-30 07:18:31 +00:00
2022-06-05 17:36:12 +00:00
from .pyfacts import TimeSeries
2022-07-24 03:16:12 +00:00
from .utils import _interval_to_years, _preprocess_from_to_date, covariance
# from dateutil.relativedelta import relativedelta
2022-04-29 02:13:06 +00:00
2022-04-30 07:18:31 +00:00
@date_parser(3, 4)
2022-04-29 02:13:06 +00:00
def sharpe_ratio(
2022-04-30 07:18:31 +00:00
time_series_data: TimeSeries,
risk_free_data: TimeSeries = None,
risk_free_rate: float = None,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
2022-05-31 15:48:55 +00:00
"""Calculate the Sharpe ratio of any time series
2022-06-04 10:05:16 +00:00
Sharpe ratio is a measure of returns per unit of risk,
where risk is measured by the standard deviation of the returns.
2022-05-31 15:48:55 +00:00
The formula for Sharpe ratio is:
(average asset return - risk free rate)/volatility of asset returns
2022-05-31 15:48:55 +00:00
Parameters
----------
2022-06-04 10:05:16 +00:00
time_series_data:
The time series for which Sharpe ratio needs to be calculated
2022-05-31 15:48:55 +00:00
2022-06-04 10:05:16 +00:00
risk_free_data:
Risk free rates as time series data.
This should be the time series of risk free returns,
and not the underlying asset value.
2022-05-31 15:48:55 +00:00
2022-06-04 10:05:16 +00:00
risk_free_rate:
Risk free rate to be used.
Either risk_free_data or risk_free_rate needs to be provided.
If both are provided, the time series data will be used.
2022-05-31 15:48:55 +00:00
2022-06-04 10:05:16 +00:00
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
2022-05-31 15:48:55 +00:00
2022-06-04 10:05:16 +00:00
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
2022-05-31 15:48:55 +00:00
2022-06-04 10:05:16 +00:00
frequency:
The frequency at which returns should be calculated.
2022-05-31 15:48:55 +00:00
2022-06-04 17:03:09 +00:00
return_period_unit: 'years', 'months', 'days'
2022-06-04 10:05:16 +00:00
The type of time period to use for return calculation.
2022-05-31 15:48:55 +00:00
2022-06-04 17:03:09 +00:00
return_period_value: int
2022-06-04 10:05:16 +00:00
The value of the specified interval type over which returns needs to be calculated.
2022-05-31 15:48:55 +00:00
2022-06-04 17:03:09 +00:00
as_on_match: str, optional
The mode of matching the as_on_date. Refer closest.
2022-05-31 15:48:55 +00:00
2022-06-04 17:03:09 +00:00
prior_match: str, optional
The mode of matching the prior_date. Refer closest.
2022-05-31 15:48:55 +00:00
2022-06-04 17:03:09 +00:00
closest: str, optional
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
2022-05-31 15:48:55 +00:00
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
2022-05-31 15:48:55 +00:00
Returns
-------
Value of Sharpe ratio as a float.
2022-05-31 15:48:55 +00:00
Raises
------
ValueError
If risk free data or risk free rate is not provided.
2022-05-31 15:48:55 +00:00
"""
interval_days = math.ceil(_interval_to_years(return_period_unit, return_period_value) * 365)
if from_date is None:
from_date = time_series_data.start_date + datetime.timedelta(days=interval_days)
if to_date is None:
to_date = time_series_data.end_date
2022-04-29 02:13:06 +00:00
if risk_free_data is None and risk_free_rate is None:
raise ValueError("At least one of risk_free_data or risk_free rate is required")
2022-05-07 08:39:21 +00:00
elif risk_free_data is not None:
risk_free_rate = risk_free_data.mean()
2022-04-29 02:13:06 +00:00
2022-04-30 07:18:31 +00:00
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
}
2022-05-07 08:39:21 +00:00
average_rr = time_series_data.average_rolling_return(**common_params, annual_compounded_returns=True)
2022-04-29 02:13:06 +00:00
2022-05-07 08:39:21 +00:00
excess_returns = average_rr - risk_free_rate
2022-04-30 07:18:31 +00:00
sd = time_series_data.volatility(
**common_params,
annualize_volatility=True,
)
2022-05-07 08:39:21 +00:00
sharpe_ratio_value = excess_returns / sd
return sharpe_ratio_value
@date_parser(2, 3)
def beta(
asset_data: TimeSeries,
market_data: TimeSeries,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
"""Beta is a measure of sensitivity of asset returns to market returns
The formula for beta is:
Parameters
----------
2022-06-04 17:03:09 +00:00
asset_data: TimeSeries
The time series data of the asset
2022-06-04 17:03:09 +00:00
market_data: TimeSeries
The time series data of the relevant market index
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
frequency:
The frequency at which returns should be calculated.
2022-06-04 17:03:09 +00:00
return_period_unit: 'years', 'months', 'days'
The type of time period to use for return calculation.
2022-06-04 17:03:09 +00:00
return_period_value: int
The value of the specified interval type over which returns needs to be calculated.
2022-06-04 17:03:09 +00:00
as_on_match: str, optional
The mode of matching the as_on_date. Refer closest.
2022-06-04 17:03:09 +00:00
prior_match: str, optional
The mode of matching the prior_date. Refer closest.
2022-06-04 17:03:09 +00:00
closest: str, optional
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
Returns
-------
The value of beta as a float.
"""
2022-05-31 15:48:55 +00:00
interval_years = _interval_to_years(return_period_unit, return_period_value)
interval_days = math.ceil(interval_years * 365)
2022-05-31 15:48:55 +00:00
annual_compounded_returns = True if interval_years > 1 else False
if from_date is None:
from_date = asset_data.start_date + datetime.timedelta(days=interval_days)
if to_date is None:
to_date = asset_data.end_date
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
2022-05-31 15:48:55 +00:00
"annual_compounded_returns": annual_compounded_returns,
}
asset_rr = asset_data.calculate_rolling_returns(**common_params)
market_rr = market_data.calculate_rolling_returns(**common_params)
cov = covariance(asset_rr.values, market_rr.values)
market_var = statistics.variance(market_rr.values)
beta = cov / market_var
return beta
2022-06-04 17:03:09 +00:00
@date_parser(4, 5)
def jensens_alpha(
asset_data: TimeSeries,
market_data: TimeSeries,
risk_free_data: TimeSeries = None,
risk_free_rate: float = None,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
"""
This function calculates the Jensen's alpha for a time series.
The formula for Jensen's alpha is:
Ri - Rf + B x (Rm - Rf)
where:
Ri = Realized return of the portfolio or investment
Rf = The risk free rate during the return time frame
B = Beta of the portfolio or investment
Rm = Realized return of the market index
2022-06-04 16:02:51 +00:00
Parameters
----------
2022-06-04 17:03:09 +00:00
asset_data: TimeSeries
2022-06-04 16:02:51 +00:00
The time series data of the asset
2022-06-04 17:03:09 +00:00
market_data: TimeSeries
2022-06-04 16:02:51 +00:00
The time series data of the relevant market index
risk_free_data:
Risk free rates as time series data.
This should be the time series of risk free returns,
and not the underlying asset value.
risk_free_rate:
Risk free rate to be used.
Either risk_free_data or risk_free_rate needs to be provided.
If both are provided, the time series data will be used.
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
frequency:
The frequency at which returns should be calculated.
2022-06-04 17:03:09 +00:00
return_period_unit: 'years', 'months', 'days'
2022-06-04 16:02:51 +00:00
The type of time period to use for return calculation.
2022-06-04 17:03:09 +00:00
return_period_value: int
2022-06-04 16:02:51 +00:00
The value of the specified interval type over which returns needs to be calculated.
2022-06-04 17:03:09 +00:00
as_on_match: str, optional
2022-06-04 16:02:51 +00:00
The mode of matching the as_on_date. Refer closest.
2022-06-04 17:03:09 +00:00
prior_match: str, optional
2022-06-04 16:02:51 +00:00
The mode of matching the prior_date. Refer closest.
2022-06-04 17:03:09 +00:00
closest: str, optional
2022-06-04 16:02:51 +00:00
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
Returns
-------
The value of Jensen's alpha as a float.
"""
2022-06-04 16:02:51 +00:00
interval_years = _interval_to_years(return_period_unit, return_period_value)
interval_days = math.ceil(interval_years * 365)
if from_date is None:
from_date = asset_data.start_date + datetime.timedelta(days=interval_days)
if to_date is None:
to_date = asset_data.end_date
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
}
num_days = (to_date - from_date).days
compound_realised_returns = True if num_days > 365 else False
realized_return = asset_data.calculate_returns(
as_on=to_date,
return_period_unit="days",
return_period_value=num_days,
annual_compounded_returns=compound_realised_returns,
as_on_match=as_on_match,
prior_match=prior_match,
closest=closest,
date_format=date_format,
)
market_return = market_data.calculate_returns(
as_on=to_date,
return_period_unit="days",
return_period_value=num_days,
annual_compounded_returns=compound_realised_returns,
as_on_match=as_on_match,
prior_match=prior_match,
closest=closest,
date_format=date_format,
)
beta_value = beta(asset_data=asset_data, market_data=market_data, **common_params)
if risk_free_data is None and risk_free_rate is None:
raise ValueError("At least one of risk_free_data or risk_free rate is required")
elif risk_free_data is not None:
risk_free_rate = risk_free_data.mean()
jensens_alpha = realized_return[1] - risk_free_rate + beta_value * (market_return[1] - risk_free_rate)
return jensens_alpha
2022-06-04 17:03:09 +00:00
@date_parser(2, 3)
def correlation(
data1: TimeSeries,
data2: TimeSeries,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
"""Calculate the correlation between two assets
correlation calculation is done based on rolling returns.
It must be noted that correlation is not calculated directly on the asset prices.
The asset prices used to calculate returns and correlation is then calculated based on these returns.
Hence this function requires all parameters for rolling returns calculations.
Parameters
----------
data1: TimeSeries
The first time series data
data2: TimeSeries
The second time series data
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
frequency:
The frequency at which returns should be calculated.
return_period_unit: 'years', 'months', 'days'
The type of time period to use for return calculation.
return_period_value: int
The value of the specified interval type over which returns needs to be calculated.
as_on_match: str, optional
The mode of matching the as_on_date. Refer closest.
prior_match: str, optional
The mode of matching the prior_date. Refer closest.
closest: str, optional
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
Returns
-------
The value of beta as a float.
Raises
------
ValueError:
* If frequency of both TimeSeries do not match
* If both time series do not have data between the from date and to date
"""
interval_years = _interval_to_years(return_period_unit, return_period_value)
interval_days = math.ceil(interval_years * 365)
2022-06-04 17:03:09 +00:00
annual_compounded_returns = True if interval_years > 1 else False
if from_date is None:
from_date = data1.start_date + datetime.timedelta(days=interval_days)
if to_date is None:
to_date = data1.end_date
if data1.frequency != data2.frequency:
raise ValueError("Correlation calculation requires both time series to be of same frequency")
if from_date < data2.start_date or to_date > data2.end_date:
raise ValueError("Data between from_date and to_date must be present in both time series")
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
"annual_compounded_returns": annual_compounded_returns,
}
asset_rr = data1.calculate_rolling_returns(**common_params)
market_rr = data2.calculate_rolling_returns(**common_params)
cor = statistics.correlation(asset_rr.values, market_rr.values)
return cor
2022-06-06 16:40:16 +00:00
@date_parser(3, 4)
def sortino_ratio(
time_series_data: TimeSeries,
risk_free_data: TimeSeries = None,
risk_free_rate: float = None,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
"""Calculate the Sortino ratio of any time series
2022-06-06 16:40:16 +00:00
Sortino ratio is a variation of the Sharpe ratio,
where risk is measured as standard deviation of negative returns only.
Since deviation on the positive side is not undesirable, hence sortino ratio excludes positive deviations.
2022-06-06 16:40:16 +00:00
The formula for Sortino ratio is:
(average asset return - risk free rate)/volatility of negative asset returns
2022-06-06 16:40:16 +00:00
Parameters
----------
time_series_data:
The time series for which Sharpe ratio needs to be calculated
risk_free_data:
Risk free rates as time series data.
This should be the time series of risk free returns,
and not the underlying asset value.
risk_free_rate:
Risk free rate to be used.
Either risk_free_data or risk_free_rate needs to be provided.
If both are provided, the time series data will be used.
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
frequency:
The frequency at which returns should be calculated.
return_period_unit: 'years', 'months', 'days'
The type of time period to use for return calculation.
return_period_value: int
The value of the specified interval type over which returns needs to be calculated.
as_on_match: str, optional
The mode of matching the as_on_date. Refer closest.
prior_match: str, optional
The mode of matching the prior_date. Refer closest.
closest: str, optional
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
Returns
-------
Value of Sortino ratio as a float.
2022-06-06 16:40:16 +00:00
Raises
------
ValueError
If risk free data or risk free rate is not provided.
"""
interval_days = math.ceil(_interval_to_years(return_period_unit, return_period_value) * 365)
2022-06-06 16:40:16 +00:00
2022-07-24 03:16:12 +00:00
# if from_date is None:
# from_date = time_series_data.start_date + relativedelta(**{return_period_unit: return_period_value})
# if to_date is None:
# to_date = time_series_data.end_date
from_date, to_date = _preprocess_from_to_date(
from_date,
to_date,
time_series_data,
False,
return_period_unit,
return_period_value,
as_on_match,
prior_match,
closest,
)
2022-06-06 16:40:16 +00:00
if risk_free_data is None and risk_free_rate is None:
raise ValueError("At least one of risk_free_data or risk_free rate is required")
elif risk_free_data is not None:
risk_free_rate = risk_free_data.mean()
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
}
average_rr_ts = time_series_data.calculate_rolling_returns(
**common_params, annual_compounded_returns=False, if_not_found="nan"
)
average_rr = statistics.mean(filter(lambda x: str(x) != "nan", average_rr_ts.values))
annualized_average_rr = (1 + average_rr) ** (365 / interval_days) - 1
2022-06-06 16:40:16 +00:00
excess_returns = annualized_average_rr - risk_free_rate
2022-07-24 03:16:12 +00:00
my_list = [i for i in average_rr_ts.values if i < 0]
sd = statistics.stdev(my_list) # [i for i in average_rr_ts.values if i < 0])
2022-06-12 16:05:37 +00:00
sd *= math.sqrt(365 / interval_days)
2022-06-06 16:40:16 +00:00
sortino_ratio_value = excess_returns / sd
return sortino_ratio_value