Compare commits
179 Commits
03283f7ed4
...
master
Author | SHA1 | Date | |
---|---|---|---|
173fba0f03 | |||
a4ecd38b97 | |||
3e5875b873 | |||
2f50894b46 | |||
|
06be27d46c | ||
c453ff20e5 | |||
a455cdfc65 | |||
f317a93bfe | |||
|
14afb1400a | ||
0bab00f455 | |||
56e8017de7 | |||
7108fa2a56 | |||
6cf56ddf11 | |||
3a5ca91234 | |||
db8f73d5c6 | |||
7b65f6ff3f | |||
18b60bd608 | |||
0fec9abac0 | |||
c35bc35529 | |||
c4e1d8b586 | |||
db8377f0ef | |||
583ca98e51 | |||
b1305ca89d | |||
ef68ae0293 | |||
cae704b658 | |||
a69f3e495e | |||
40429fc70a | |||
c7e955f91e | |||
d0c087c3bf | |||
2367d1aef8 | |||
31abaa4052 | |||
d229c9cf2d | |||
a8b90182da | |||
48e47e34a8 | |||
469c421639 | |||
3bc7e7b496 | |||
a395f7d98d | |||
56baf83a77 | |||
8c159062f5 | |||
371b319e9d | |||
a0499ca157 | |||
33c56d8f6c | |||
e450395ad0 | |||
3ffec7b11b | |||
6c8800bef2 | |||
f46ebaa8a9 | |||
e9bb795ecf | |||
569f20709b | |||
c713e3283b | |||
0bf1deac48 | |||
c605f71f10 | |||
a6fcd29a34 | |||
8117986742 | |||
da2993ebf0 | |||
f41b9c7519 | |||
7504c840eb | |||
1682fe12cc | |||
177e3bc4c8 | |||
922fe0f027 | |||
38fb9ca7d0 | |||
0a113fdd8a | |||
9a71cdf355 | |||
66ad448516 | |||
49cebecb88 | |||
da0bfcbcb1 | |||
cad069d351 | |||
130f4e58e9 | |||
2ca6167c8b | |||
95e9bfd51c | |||
5512a647ad | |||
7e524ccf7a | |||
aea6bf9b57 | |||
68d854cb3f | |||
0d0b2121a3 | |||
2a8f5b4041 | |||
19523519ee | |||
41562f7e70 | |||
3189e50bd8 | |||
336cf41ca8 | |||
0f002f3478 | |||
79cd44d41f | |||
978566e0a8 | |||
c99ffe02d0 | |||
65f2e8434c | |||
e8be7e9efa | |||
49604a5ae9 | |||
b38a317b82 | |||
03a8045400 | |||
625c9228e9 | |||
3ec5b06e83 | |||
e8dbc16157 | |||
b246709603 | |||
09365c7957 | |||
7bcd310652 | |||
2ee8f928af | |||
d4bbaf7903 | |||
83a8b05802 | |||
123cc0b204 | |||
6547a5e64a | |||
22c295ece1 | |||
30749382cf | |||
88e7b3994b | |||
8e8146a07d | |||
ce6326f4b5 | |||
e06626dbca | |||
33feb62015 | |||
ed973cc259 | |||
b5aa5d22d4 | |||
03ccbe0cb1 | |||
fa2ab84c92 | |||
6ffa52f84e | |||
d88acc5888 | |||
eb63766c1e | |||
faf0b44e46 | |||
875089349e | |||
3b96d231f4 | |||
8be3359437 | |||
ee48446c6e | |||
b34c14d778 | |||
ff865cb2b9 | |||
d24b0d8bb2 | |||
1a5518e62a | |||
b2a4d73c59 | |||
a19f7e7b21 | |||
2237692677 | |||
1758df0124 | |||
c992905bf6 | |||
97731b4c12 | |||
810e0bfb65 | |||
d7b06fbe24 | |||
ce5540e26b | |||
6bbdac35ec | |||
f00305771b | |||
c481e2b786 | |||
d757479cca | |||
793d5b1ad7 | |||
7b541290c6 | |||
24d5d253b5 | |||
17b3e348a2 | |||
32e4f25f59 | |||
60f25de710 | |||
347634f536 | |||
eeda4064b2 | |||
00ac66219b | |||
ad68dcd930 | |||
cad4e1f45c | |||
0cbfede7b6 | |||
336276cf4b | |||
c9bfa485f5 | |||
7cac5cc307 | |||
ef2973a1d1 | |||
d1f9e3924f | |||
1be38ce7d4 | |||
77845ff501 | |||
804609ac0e | |||
870d0bd96b | |||
9e4049a973 | |||
f4e7840f28 | |||
dfd718e106 | |||
|
50c423611d | ||
|
308a4f1abb | ||
|
0fbca4ae4c | ||
|
66ccd2a3f8 | ||
|
d9ec9b508b | ||
23882b2380 | |||
3bf4ca9c61 | |||
|
941dbb36b3 | ||
|
21de3397a3 | ||
|
8bdd8922cb | ||
9707c095b0 | |||
306a2d730c | |||
371f7e2279 | |||
a39b4fa606 | |||
15b0b96488 | |||
3aa38d31d1 | |||
6c006cb6a4 | |||
b4d5291572 | |||
9ddf70fc61 | |||
04176646b6 |
7
.gitignore
vendored
7
.gitignore
vendored
@ -2,4 +2,9 @@
|
||||
.eggs
|
||||
.env
|
||||
*egg-info
|
||||
__pycache__
|
||||
__pycache__
|
||||
.vscode
|
||||
.idea
|
||||
build
|
||||
.coverage
|
||||
.DS_store
|
@ -1,15 +0,0 @@
|
||||
# Fincal
|
||||
This module simplified handling of time-series data
|
||||
|
||||
## The problem
|
||||
Time series data often have missing data points. These missing points mess things up when you are trying to do a comparison between two sections of a time series.
|
||||
|
||||
To make things worse, most libraries don't allow comparison based on dates. Month to Month and year to year comparisons become difficult as they cannot be translated into number of days. However, these are commonly used metrics while looking at financial data.
|
||||
|
||||
## The Solution
|
||||
Fincal aims to simplify things by allowing you to:
|
||||
* Compare time-series data based on dates
|
||||
* Easy way to work around missing dates by taking the closest data points
|
||||
* Completing series with missing data points using forward fill and backward fill
|
||||
|
||||
## Examples
|
@ -1,129 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import datetime\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"from fincal.fincal import TimeSeries\n",
|
||||
"from fincal.core import Series"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "757eafc2-f804-4e7e-a3b8-2d09cd62e646",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "59b3d4a9-8ef4-4652-9e20-1bac69ab4ff9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "4bc95ae0-8c33-4eab-acf9-e765d22979b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Warning: The input data contains duplicate dates which have been ignored.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "f2c3218c-3984-43d6-8638-41a74a9d0b58",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
|
||||
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
|
||||
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n",
|
||||
"\t ...\n",
|
||||
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
|
||||
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
|
||||
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "TypeError",
|
||||
"evalue": "getattr(): attribute name must be string",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[1;32m<timed eval>:1\u001b[0m, in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n",
|
||||
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:203\u001b[0m, in \u001b[0;36mTimeSeries.calculate_rolling_returns\u001b[1;34m(self, from_date, to_date, frequency, as_on_match, prior_match, closest, compounding, years)\u001b[0m\n\u001b[0;32m 200\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid argument for frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfrequency\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 203\u001b[0m dates \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_date_series\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfrom_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mto_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m frequency \u001b[38;5;241m==\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mD:\n\u001b[0;32m 205\u001b[0m dates \u001b[38;5;241m=\u001b[39m [i \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m dates \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtime_series]\n",
|
||||
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:16\u001b[0m, in \u001b[0;36mcreate_date_series\u001b[1;34m(start_date, end_date, frequency, eomonth)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_date_series\u001b[39m(\n\u001b[0;32m 12\u001b[0m start_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, end_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, frequency: \u001b[38;5;28mstr\u001b[39m, eomonth: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 13\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[datetime\u001b[38;5;241m.\u001b[39mdatetime]:\n\u001b[0;32m 14\u001b[0m \u001b[38;5;124;03m\"\"\"Creates a date series using a frequency\"\"\"\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m frequency \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mAllFrequencies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m eomonth \u001b[38;5;129;01mand\u001b[39;00m frequency\u001b[38;5;241m.\u001b[39mdays \u001b[38;5;241m<\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mdays:\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meomonth cannot be set to True if frequency is higher than \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mAllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"\u001b[1;31mTypeError\u001b[0m: getattr(): attribute name must be string"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"ts.calculate_rolling_returns(from_date='2020-01-01', to_date='2021-01-01')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
256
README.md
256
README.md
@ -1,15 +1,253 @@
|
||||
# Fincal
|
||||
This module simplified handling of time-series data
|
||||
# PyFacts
|
||||
|
||||
PyFacts stands for Python library for Financial analysis and computations on time series. It is a library which makes it simple to work with time series data.
|
||||
|
||||
Most libraries, and languages like SQL, work with rows. Operations are performed by rows and not by dates. For instance, to calculate 1-year rolling returns in SQL, you are forced to use either a lag of 365/252 rows, leading to an approximation, or slow and cumbersome joins. PyFacts solves this by allowing you to work with dates and time intervals. Hence, to calculate 1-year returns, you will be specifying a lag of 1-year and the library will do the grunt work of finding the most appropriate observations to calculate these returns on.
|
||||
|
||||
## The problem
|
||||
Time series data often have missing data points. These missing points mess things up when you are trying to do a comparison between two sections of a time series.
|
||||
|
||||
To make things worse, most libraries don't allow comparison based on dates. Month to Month and year to year comparisons become difficult as they cannot be translated into number of days. However, these are commonly used metrics while looking at financial data.
|
||||
Libraries and languages usually don't allow comparison based on dates. Calculating month on month or year on year returns are always cumbersome as users are forced to rely on row lags. However, data always have inconsistencies, especially financial data. Markets don't work on weekends, there are off days, data doesn't get released on a few days a year, data availability is patchy when dealing with 40-year old data. All these problems are exacerbated when you are forced to make calculations using lag.
|
||||
|
||||
## The Solution
|
||||
Fincal aims to simplify things by allowing you to:
|
||||
* Compare time-series data based on dates
|
||||
* Easy way to work around missing dates by taking the closest data points
|
||||
* Completing series with missing data points using forward fill and backward fill
|
||||
|
||||
## Examples
|
||||
PyFacts aims to simplify things by allowing you to:
|
||||
|
||||
- Compare time-series data based on dates and time-period-based lag
|
||||
- Easy way to work around missing dates by taking the closest data points
|
||||
- Completing series with missing data points using forward fill and backward fill
|
||||
- Use friendly dates everywhere written as a simple string
|
||||
|
||||
## Creating a time series
|
||||
|
||||
Time series data can be created from a dictionary, a list of lists/tuples/dicts, or by reading a csv file.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
>>> import pyfacts as pft
|
||||
|
||||
>>> time_series_data = [
|
||||
... ('2021-01-01', 10),
|
||||
... ('2021-02-01', 12),
|
||||
... ('2021-03-01', 14),
|
||||
... ('2021-04-01', 16),
|
||||
... ('2021-05-01', 18),
|
||||
... ('2021-06-01', 20)
|
||||
...]
|
||||
|
||||
>>> ts = pft.TimeSeries(time_series_data)
|
||||
```
|
||||
|
||||
### Sample usage
|
||||
|
||||
```
|
||||
>>> ts.calculate_returns(as_on='2021-04-01', return_period_unit='months', return_period_value=3, annual_compounded_returns=False)
|
||||
(datetime.datetime(2021, 4, 1, 0, 0), 0.6)
|
||||
|
||||
>>> ts.calculate_returns(as_on='2021-04-15', return_period_unit='months', return_period_value=3, annual_compounded_returns=False)
|
||||
(datetime.datetime(2021, 4, 1, 0, 0), 0.6)
|
||||
```
|
||||
|
||||
### Working with dates
|
||||
|
||||
With PyFacts, you never have to go into the hassle of creating datetime objects for your time series. PyFacts will parse any date passed to it as string. The default format is ISO format, i.e., YYYY-MM-DD. However, you can use your preferred format simply by specifying it in the options in datetime library compatible format, after importing the library. For example, to use DD-MM-YYY format:
|
||||
|
||||
```
|
||||
>>> import pyfacts as pft
|
||||
>>> pft.PyfactsOptions.date_format = '%d-%m-%Y'
|
||||
```
|
||||
|
||||
Now the library will automatically parse all dates as DD-MM-YYYY
|
||||
|
||||
If you happen to have any one situation where you need to use a different format, all methods accept a date_format parameter to override the default.
|
||||
|
||||
### Working with multiple time series
|
||||
|
||||
While working with time series data, you will often need to perform calculations on the data. PyFacts supports all kinds of mathematical operations on time series.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
>>> import pyfacts as pft
|
||||
|
||||
>>> time_series_data = [
|
||||
... ('2021-01-01', 10),
|
||||
... ('2021-02-01', 12),
|
||||
... ('2021-03-01', 14),
|
||||
... ('2021-04-01', 16),
|
||||
... ('2021-05-01', 18),
|
||||
... ('2021-06-01', 20)
|
||||
...]
|
||||
|
||||
>>> ts = pft.TimeSeries(time_series_data)
|
||||
>>> print(ts/100)
|
||||
|
||||
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 0.1),
|
||||
(datetime.datetime(2022, 1, 2, 0, 0), 0.12),
|
||||
(datetime.datetime(2022, 1, 3, 0, 0), 0.14),
|
||||
(datetime.datetime(2022, 1, 4, 0, 0), 0.16),
|
||||
(datetime.datetime(2022, 1, 6, 0, 0), 0.18),
|
||||
(datetime.datetime(2022, 1, 7, 0, 0), 0.2)], frequency='M')
|
||||
```
|
||||
|
||||
Mathematical operations can also be done between time series as long as they have the same dates.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
>>> import pyfacts as pft
|
||||
|
||||
>>> time_series_data = [
|
||||
... ('2021-01-01', 10),
|
||||
... ('2021-02-01', 12),
|
||||
... ('2021-03-01', 14),
|
||||
... ('2021-04-01', 16),
|
||||
... ('2021-05-01', 18),
|
||||
... ('2021-06-01', 20)
|
||||
...]
|
||||
|
||||
>>> ts = pft.TimeSeries(time_series_data)
|
||||
>>> ts2 = pft.TimeSeries(time_series_data)
|
||||
>>> print(ts/ts2)
|
||||
|
||||
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 1.0),
|
||||
(datetime.datetime(2022, 1, 2, 0, 0), 1.0),
|
||||
(datetime.datetime(2022, 1, 3, 0, 0), 1.0),
|
||||
(datetime.datetime(2022, 1, 4, 0, 0), 1.0),
|
||||
(datetime.datetime(2022, 1, 6, 0, 0), 1.0),
|
||||
(datetime.datetime(2022, 1, 7, 0, 0), 1.0)], frequency='M')
|
||||
```
|
||||
|
||||
However, if the dates are not in sync, PyFacts provides convenience methods for syncronising dates.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
>>> import pyfacts as pft
|
||||
|
||||
>>> data1 = [
|
||||
... ('2021-01-01', 10),
|
||||
... ('2021-02-01', 12),
|
||||
... ('2021-03-01', 14),
|
||||
... ('2021-04-01', 16),
|
||||
... ('2021-05-01', 18),
|
||||
... ('2021-06-01', 20)
|
||||
...]
|
||||
|
||||
>>> data2 = [
|
||||
... ("2022-15-01", 20),
|
||||
... ("2022-15-02", 22),
|
||||
... ("2022-15-03", 24),
|
||||
... ("2022-15-04", 26),
|
||||
... ("2022-15-06", 28),
|
||||
... ("2022-15-07", 30)
|
||||
...]
|
||||
|
||||
>>> ts = pft.TimeSeries(data, frequency='M', date_format='%Y-%d-%m')
|
||||
>>> ts2 = pft.TimeSeries(data2, frequency='M', date_format='%Y-%d-%m')
|
||||
>>> ts.sync(ts2, fill_method='bfill') # Sync ts2 with ts1
|
||||
|
||||
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 20.0),
|
||||
(datetime.datetime(2022, 2, 1, 0, 0), 22.0),
|
||||
(datetime.datetime(2022, 3, 1, 0, 0), 24.0),
|
||||
(datetime.datetime(2022, 4, 1, 0, 0), 26.0),
|
||||
(datetime.datetime(2022, 6, 1, 0, 0), 28.0),
|
||||
(datetime.datetime(2022, 7, 1, 0, 0), 30.0)], frequency='M')
|
||||
```
|
||||
|
||||
Even if you need to perform calculations on data with different frequencies, PyFacts will let you easily handle this with the expand and shrink methods.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
>>> data = [
|
||||
... ("2022-01-01", 10),
|
||||
... ("2022-02-01", 12),
|
||||
... ("2022-03-01", 14),
|
||||
... ("2022-04-01", 16),
|
||||
... ("2022-05-01", 18),
|
||||
... ("2022-06-01", 20)
|
||||
...]
|
||||
|
||||
>>> ts = pft.TimeSeries(data, 'M')
|
||||
>>> ts.expand(to_frequency='W', method='ffill')
|
||||
|
||||
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 10.0),
|
||||
(datetime.datetime(2022, 1, 8, 0, 0), 10.0),
|
||||
(datetime.datetime(2022, 1, 15, 0, 0), 10.0)
|
||||
...
|
||||
(datetime.datetime(2022, 5, 14, 0, 0), 18.0),
|
||||
(datetime.datetime(2022, 5, 21, 0, 0), 18.0),
|
||||
(datetime.datetime(2022, 5, 28, 0, 0), 18.0)], frequency='W')
|
||||
|
||||
>>> ts.shrink(to_frequency='Q', method='ffill')
|
||||
|
||||
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 10.0),
|
||||
(datetime.datetime(2022, 4, 1, 0, 0), 16.0)], frequency='Q')
|
||||
```
|
||||
|
||||
If you want to shorten the timeframe of the data with an aggregation function, the transform method will help you out. Currently it supports sum and mean.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
>>> data = [
|
||||
... ("2022-01-01", 10),
|
||||
... ("2022-02-01", 12),
|
||||
... ("2022-03-01", 14),
|
||||
... ("2022-04-01", 16),
|
||||
... ("2022-05-01", 18),
|
||||
... ("2022-06-01", 20),
|
||||
... ("2022-07-01", 22),
|
||||
... ("2022-08-01", 24),
|
||||
... ("2022-09-01", 26),
|
||||
... ("2022-10-01", 28),
|
||||
... ("2022-11-01", 30),
|
||||
... ("2022-12-01", 32)
|
||||
...]
|
||||
|
||||
>>> ts = pft.TimeSeries(data, 'M')
|
||||
>>> ts.transform(to_frequency='Q', method='sum')
|
||||
|
||||
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 36.0),
|
||||
(datetime.datetime(2022, 4, 1, 0, 0), 54.0),
|
||||
(datetime.datetime(2022, 7, 1, 0, 0), 72.0),
|
||||
(datetime.datetime(2022, 10, 1, 0, 0), 90.0)], frequency='Q')
|
||||
|
||||
>>> ts.transform(to_frequency='Q', method='mean')
|
||||
|
||||
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 12.0),
|
||||
(datetime.datetime(2022, 4, 1, 0, 0), 18.0),
|
||||
(datetime.datetime(2022, 7, 1, 0, 0), 24.0),
|
||||
(datetime.datetime(2022, 10, 1, 0, 0), 30.0)], frequency='Q')
|
||||
```
|
||||
|
||||
## To-do
|
||||
|
||||
### Core features
|
||||
|
||||
- [x] Add **setitem**
|
||||
- [ ] Create emtpy TimeSeries object
|
||||
- [x] Read from CSV
|
||||
- [ ] Write to CSV
|
||||
- [x] Convert to dict
|
||||
- [x] Convert to list of tuples
|
||||
|
||||
### pyfacts features
|
||||
|
||||
- [x] Sync two TimeSeries
|
||||
- [x] Average rolling return
|
||||
- [x] Sharpe ratio
|
||||
- [x] Jensen's Alpha
|
||||
- [x] Beta
|
||||
- [x] Sortino ratio
|
||||
- [x] Correlation & R-squared
|
||||
- [ ] Treynor ratio
|
||||
- [x] Max drawdown
|
||||
- [ ] Moving average
|
||||
|
||||
### Pending implementation
|
||||
|
||||
- [x] Use limit parameter in ffill and bfill
|
||||
- [x] Implementation of ffill and bfill may be incorrect inside expand, check and correct
|
||||
- [ ] Implement interpolation in expand
|
||||
|
29
dict_iter.py
29
dict_iter.py
@ -1,29 +0,0 @@
|
||||
import pandas
|
||||
|
||||
from fincal.fincal import TimeSeries
|
||||
|
||||
dfd = pandas.read_csv('test_files/nav_history_daily - Copy.csv')
|
||||
dfm = pandas.read_csv('test_files/nav_history_monthly.csv')
|
||||
|
||||
data_d = [(i.date, i.nav) for i in dfd.itertuples() if i.amfi_code == 118825]
|
||||
data_d.sort()
|
||||
data_m = [{'date': i.date, 'value': i.nav} for i in dfm.itertuples()]
|
||||
|
||||
tsd = TimeSeries(data_d, frequency='D')
|
||||
|
||||
md = dict(data_d)
|
||||
counter = 1
|
||||
for i in iter(md):
|
||||
print(i)
|
||||
counter += 1
|
||||
if counter >= 5: break
|
||||
|
||||
print('\n')
|
||||
counter = 1
|
||||
for i in reversed(md):
|
||||
print('rev', i)
|
||||
counter += 1
|
||||
if counter >= 5: break
|
||||
|
||||
x = [next(i) for i in iter(md)]
|
||||
print(x)
|
@ -1 +0,0 @@
|
||||
from fincal import *
|
@ -1,20 +0,0 @@
|
||||
import sys
|
||||
|
||||
|
||||
def main(args=None):
|
||||
"""The main routine."""
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
|
||||
print("This is the main routine.")
|
||||
print("It should do something interesting.")
|
||||
|
||||
print("This is the name of the script: ", sys.argv[0])
|
||||
print("Number of arguments: ", len(sys.argv))
|
||||
print("The arguments are: ", str(sys.argv))
|
||||
|
||||
# Do argument parsing here with argparse
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
363
fincal/core.py
363
fincal/core.py
@ -1,363 +0,0 @@
|
||||
import datetime
|
||||
from collections import UserDict, UserList
|
||||
from dataclasses import dataclass
|
||||
from numbers import Number
|
||||
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Type, Union
|
||||
|
||||
|
||||
@dataclass
|
||||
class FincalOptions:
|
||||
date_format: str = "%Y-%m-%d"
|
||||
closest: str = "before" # after
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Frequency:
|
||||
name: str
|
||||
freq_type: str
|
||||
value: int
|
||||
days: int
|
||||
symbol: str
|
||||
|
||||
|
||||
class AllFrequencies:
|
||||
D = Frequency("daily", "days", 1, 1, "D")
|
||||
W = Frequency("weekly", "days", 7, 7, "W")
|
||||
M = Frequency("monthly", "months", 1, 30, "M")
|
||||
Q = Frequency("quarterly", "months", 3, 91, "Q")
|
||||
H = Frequency("half-yearly", "months", 6, 182, "H")
|
||||
Y = Frequency("annual", "years", 1, 365, "Y")
|
||||
|
||||
|
||||
def _preprocess_timeseries(
|
||||
data: Union[
|
||||
Sequence[Iterable[Union[str, datetime.datetime, float]]],
|
||||
Sequence[Mapping[str, Union[float, datetime.datetime]]],
|
||||
Sequence[Mapping[Union[str, datetime.datetime], float]],
|
||||
Mapping[Union[str, datetime.datetime], float],
|
||||
],
|
||||
date_format: str,
|
||||
) -> List[Tuple[datetime.datetime, float]]:
|
||||
"""Converts any type of list to the correct type"""
|
||||
|
||||
if isinstance(data, Sequence):
|
||||
if isinstance(data[0], Mapping):
|
||||
if len(data[0].keys()) == 2:
|
||||
current_data = [tuple(i.values()) for i in data]
|
||||
elif len(data[0].keys()) == 1:
|
||||
current_data = [tuple(*i.items()) for i in data]
|
||||
else:
|
||||
raise TypeError("Could not parse the data")
|
||||
current_data = _preprocess_timeseries(current_data, date_format)
|
||||
|
||||
elif isinstance(data[0], Sequence):
|
||||
if isinstance(data[0][0], str):
|
||||
current_data = []
|
||||
for i in data:
|
||||
row = datetime.datetime.strptime(i[0], date_format), i[1]
|
||||
current_data.append(row)
|
||||
elif isinstance(data[0][0], datetime.datetime):
|
||||
current_data = [(i, j) for i, j in data]
|
||||
else:
|
||||
raise TypeError("Could not parse the data")
|
||||
else:
|
||||
raise TypeError("Could not parse the data")
|
||||
|
||||
elif isinstance(data, Mapping):
|
||||
current_data = [(k, v) for k, v in data.items()]
|
||||
current_data = _preprocess_timeseries(current_data, date_format)
|
||||
|
||||
else:
|
||||
raise TypeError("Could not parse the data")
|
||||
current_data.sort()
|
||||
return current_data
|
||||
|
||||
|
||||
def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta:
|
||||
"""Checks the arguments and returns appropriate timedelta objects"""
|
||||
|
||||
deltas = {"exact": 0, "previous": -1, "next": 1}
|
||||
if closest not in deltas.keys():
|
||||
raise ValueError(f"Invalid closest argument: {closest}")
|
||||
|
||||
as_on_match = closest if as_on_match == "closest" else as_on_match
|
||||
prior_match = closest if prior_match == "closest" else prior_match
|
||||
|
||||
if as_on_match in deltas.keys():
|
||||
as_on_delta = datetime.timedelta(days=deltas[as_on_match])
|
||||
else:
|
||||
raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
|
||||
|
||||
if prior_match in deltas.keys():
|
||||
prior_delta = datetime.timedelta(days=deltas[prior_match])
|
||||
else:
|
||||
raise ValueError(f"Invalid prior_match argument: {prior_match}")
|
||||
|
||||
return as_on_delta, prior_delta
|
||||
|
||||
|
||||
def _parse_date(date: str, date_format: str = None):
|
||||
"""Parses date and handles errors"""
|
||||
|
||||
if isinstance(date, (datetime.datetime, datetime.date)):
|
||||
return datetime.datetime.fromordinal(date.toordinal())
|
||||
|
||||
if date_format is None:
|
||||
date_format = FincalOptions.date_format
|
||||
|
||||
try:
|
||||
date = datetime.datetime.strptime(date, date_format)
|
||||
except TypeError:
|
||||
raise ValueError("Date does not seem to be valid date-like string")
|
||||
except ValueError:
|
||||
raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
|
||||
return date
|
||||
|
||||
|
||||
class _IndexSlicer:
|
||||
def __init__(self, parent_obj):
|
||||
self.parent = parent_obj
|
||||
|
||||
def __getitem__(self, n):
|
||||
all_keys = list(self.parent.time_series)
|
||||
if isinstance(n, int):
|
||||
keys = [all_keys[n]]
|
||||
else:
|
||||
keys = all_keys[n]
|
||||
item = [(key, self.parent.time_series[key]) for key in keys]
|
||||
if len(item) == 1:
|
||||
return item[0]
|
||||
|
||||
return item
|
||||
|
||||
|
||||
class Series(UserList):
|
||||
"""Container for a series of objects, all objects must be of the same type"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data,
|
||||
data_type: Union[Type[bool], Type[float], Type[str], Type[datetime.datetime]],
|
||||
date_format: str = None,
|
||||
):
|
||||
self.dtype = data_type
|
||||
if not isinstance(data, Sequence):
|
||||
raise TypeError("Series object can only be created using Sequence types")
|
||||
|
||||
for i in data:
|
||||
if not isinstance(i, data_type):
|
||||
raise Exception("All arguments must be of the same type")
|
||||
|
||||
if data_type == str:
|
||||
data = [_parse_date(i, date_format) for i in data]
|
||||
|
||||
self.data = data
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}({self.data})"
|
||||
|
||||
def __getitem__(self, i):
|
||||
if isinstance(i, slice):
|
||||
return self.__class__(self.data[i], self.dtype)
|
||||
else:
|
||||
return self.data[i]
|
||||
|
||||
def __gt__(self, other):
|
||||
if self.dtype == bool:
|
||||
raise TypeError("> not supported for boolean series")
|
||||
|
||||
if isinstance(other, (str, datetime.datetime, datetime.date)):
|
||||
other = _parse_date(other)
|
||||
|
||||
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
|
||||
gt = Series([i > other for i in self.data], bool)
|
||||
else:
|
||||
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
|
||||
|
||||
return gt
|
||||
|
||||
def __lt__(self, other):
|
||||
if self.dtype == bool:
|
||||
raise TypeError("< not supported for boolean series")
|
||||
|
||||
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
|
||||
lt = Series([i < other for i in self.data], bool)
|
||||
else:
|
||||
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
|
||||
return lt
|
||||
|
||||
def __eq__(self, other):
|
||||
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
|
||||
eq = Series([i == other for i in self.data], bool)
|
||||
else:
|
||||
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
|
||||
return eq
|
||||
|
||||
|
||||
class TimeSeriesCore(UserDict):
|
||||
"""Defines the core building blocks of a TimeSeries object"""
|
||||
|
||||
def __init__(
|
||||
self, data: List[Iterable], frequency: Literal["D", "W", "M", "Q", "H", "Y"], date_format: str = "%Y-%m-%d"
|
||||
):
|
||||
"""Instantiate a TimeSeries object
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : List[tuple]
|
||||
Time Series data in the form of list of tuples.
|
||||
The first element of each tuple should be a date and second element should be a value.
|
||||
|
||||
date_format : str, optional, default "%Y-%m-%d"
|
||||
Specify the format of the date
|
||||
Required only if the first argument of tuples is a string. Otherwise ignored.
|
||||
|
||||
frequency : str, optional, default "infer"
|
||||
The frequency of the time series. Default is infer.
|
||||
The class will try to infer the frequency automatically and adjust to the closest member.
|
||||
Note that inferring frequencies can fail if the data is too irregular.
|
||||
Valid values are {D, W, M, Q, H, Y}
|
||||
"""
|
||||
|
||||
data = _preprocess_timeseries(data, date_format=date_format)
|
||||
|
||||
self.data = dict(data)
|
||||
if len(self.data) != len(data):
|
||||
print("Warning: The input data contains duplicate dates which have been ignored.")
|
||||
self.frequency = getattr(AllFrequencies, frequency)
|
||||
self.iter_num = -1
|
||||
self._dates = None
|
||||
self._values = None
|
||||
self._start_date = None
|
||||
self._end_date = None
|
||||
|
||||
@property
|
||||
def dates(self):
|
||||
if self._dates is None or len(self._dates) != len(self.data):
|
||||
self._dates = list(self.data.keys())
|
||||
|
||||
return Series(self._dates, datetime.datetime)
|
||||
|
||||
@property
|
||||
def values(self):
|
||||
if self._values is None or len(self._values) != len(self.data):
|
||||
self._values = list(self.data.values())
|
||||
|
||||
return Series(self._values, float)
|
||||
|
||||
@property
|
||||
def start_date(self):
|
||||
return self.dates[0]
|
||||
|
||||
@property
|
||||
def end_date(self):
|
||||
return self.dates[-1]
|
||||
|
||||
def _get_printable_slice(self, n: int):
|
||||
"""Returns a slice of the dataframe from beginning and end"""
|
||||
|
||||
printable = {}
|
||||
iter_f = iter(self.data)
|
||||
first_n = [next(iter_f) for i in range(n // 2)]
|
||||
|
||||
iter_b = reversed(self.data)
|
||||
last_n = [next(iter_b) for i in range(n // 2)]
|
||||
last_n.sort()
|
||||
|
||||
printable["start"] = [str((i, self.data[i])) for i in first_n]
|
||||
printable["end"] = [str((i, self.data[i])) for i in last_n]
|
||||
return printable
|
||||
|
||||
def __repr__(self):
|
||||
if len(self.data) > 6:
|
||||
printable = self._get_printable_slice(6)
|
||||
printable_str = "{}([{}\n\t ...\n\t {}], frequency={})".format(
|
||||
self.__class__.__name__,
|
||||
",\n\t ".join(printable["start"]),
|
||||
",\n\t ".join(printable["end"]),
|
||||
repr(self.frequency.symbol),
|
||||
)
|
||||
else:
|
||||
printable_str = "{}([{}], frequency={})".format(
|
||||
self.__class__.__name__,
|
||||
",\n\t".join([str(i) for i in self.data.items()]),
|
||||
repr(self.frequency.symbol),
|
||||
)
|
||||
return printable_str
|
||||
|
||||
def __str__(self):
|
||||
if len(self.data) > 6:
|
||||
printable = self._get_printable_slice(6)
|
||||
printable_str = "[{}\n ...\n {}]".format(
|
||||
",\n ".join(printable["start"]),
|
||||
",\n ".join(printable["end"]),
|
||||
)
|
||||
else:
|
||||
printable_str = "[{}]".format(",\n ".join([str(i) for i in self.data.items()]))
|
||||
return printable_str
|
||||
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, Series):
|
||||
if not key.dtype == bool:
|
||||
raise ValueError(f"Cannot slice {self.__class__.__name__} using a Series of {key.dtype.__name__}")
|
||||
elif len(key) != len(self.dates):
|
||||
raise Exception(f"Length of Series: {len(key)} did not match length of object: {len(self.dates)}")
|
||||
else:
|
||||
dates_to_return = [self.dates[i] for i, j in enumerate(key) if j]
|
||||
data_to_return = [(key, self.data[key]) for key in dates_to_return]
|
||||
return self.__class__(data_to_return, frequency=self.frequency.symbol)
|
||||
|
||||
if isinstance(key, int):
|
||||
raise KeyError(f"{key}. For index based slicing, use .iloc[{key}]")
|
||||
elif isinstance(key, (datetime.datetime, datetime.date)):
|
||||
key = _parse_date(key)
|
||||
item = (key, self.data[key])
|
||||
elif isinstance(key, str):
|
||||
if key == "dates":
|
||||
return self.dates
|
||||
elif key == "values":
|
||||
return self.values
|
||||
|
||||
dt_key = _parse_date(key)
|
||||
item = (dt_key, self.data[dt_key])
|
||||
|
||||
elif isinstance(key, Sequence):
|
||||
keys = [_parse_date(i) for i in key]
|
||||
item = [(k, self.data[k]) for k in keys]
|
||||
else:
|
||||
raise TypeError(f"Invalid type {repr(type(key).__name__)} for slicing.")
|
||||
return item
|
||||
|
||||
def __iter__(self):
|
||||
self.n = 0
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self.n >= len(self.dates):
|
||||
raise StopIteration
|
||||
else:
|
||||
key = self.dates[self.n]
|
||||
self.n += 1
|
||||
return key, self.data[key]
|
||||
|
||||
def head(self, n: int = 6):
|
||||
"""Returns the first n items of the TimeSeries object"""
|
||||
|
||||
keys = list(self.data.keys())
|
||||
keys = keys[:n]
|
||||
result = [(key, self.data[key]) for key in keys]
|
||||
return result
|
||||
|
||||
def tail(self, n: int = 6):
|
||||
"""Returns the last n items of the TimeSeries object"""
|
||||
|
||||
keys = list(self.data.keys())
|
||||
keys = keys[-n:]
|
||||
result = [(key, self.data[key]) for key in keys]
|
||||
return result
|
||||
|
||||
@property
|
||||
def iloc(self):
|
||||
"""Returns an item or a set of items based on index"""
|
||||
|
||||
return _IndexSlicer(self)
|
242
fincal/fincal.py
242
fincal/fincal.py
@ -1,242 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
from typing import List, Union
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from .core import AllFrequencies, TimeSeriesCore, _parse_date, _preprocess_match_options
|
||||
|
||||
|
||||
def create_date_series(
|
||||
start_date: datetime.datetime, end_date: datetime.datetime, frequency: str, eomonth: bool = False
|
||||
) -> List[datetime.datetime]:
|
||||
"""Creates a date series using a frequency"""
|
||||
|
||||
frequency = getattr(AllFrequencies, frequency)
|
||||
if eomonth and frequency.days < AllFrequencies.M.days:
|
||||
raise ValueError(f"eomonth cannot be set to True if frequency is higher than {AllFrequencies.M.name}")
|
||||
|
||||
datediff = (end_date - start_date).days / frequency.days + 1
|
||||
dates = []
|
||||
|
||||
for i in range(0, int(datediff)):
|
||||
diff = {frequency.freq_type: frequency.value * i}
|
||||
date = start_date + relativedelta(**diff)
|
||||
if eomonth:
|
||||
if date.month == 12:
|
||||
date = date.replace(day=31)
|
||||
else:
|
||||
date = date.replace(day=1).replace(month=date.month+1) - relativedelta(days=1)
|
||||
if date <= end_date:
|
||||
dates.append(date)
|
||||
|
||||
return dates
|
||||
|
||||
|
||||
class TimeSeries(TimeSeriesCore):
|
||||
"""Container for TimeSeries objects"""
|
||||
|
||||
def info(self):
|
||||
"""Summary info about the TimeSeries object"""
|
||||
|
||||
total_dates = len(self.data.keys())
|
||||
res_string = "First date: {}\nLast date: {}\nNumber of rows: {}"
|
||||
return res_string.format(self.start_date, self.end_date, total_dates)
|
||||
|
||||
def ffill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]:
|
||||
"""Forward fill missing dates in the time series
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inplace : bool
|
||||
Modify the time-series data in place and return None.
|
||||
|
||||
limit : int, optional
|
||||
Maximum number of periods to forward fill
|
||||
|
||||
Returns
|
||||
-------
|
||||
Returns a TimeSeries object if inplace is False, otherwise None
|
||||
"""
|
||||
|
||||
eomonth = True if self.frequency.days >= AllFrequencies.M.days else False
|
||||
dates_to_fill = create_date_series(self.start_date, self.end_date, self.frequency.symbol, eomonth)
|
||||
|
||||
new_ts = dict()
|
||||
for cur_date in dates_to_fill:
|
||||
try:
|
||||
cur_val = self.data[cur_date]
|
||||
except KeyError:
|
||||
pass
|
||||
new_ts.update({cur_date: cur_val})
|
||||
|
||||
if inplace:
|
||||
self.data = new_ts
|
||||
return None
|
||||
|
||||
return self.__class__(new_ts, frequency=self.frequency.symbol)
|
||||
|
||||
def bfill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]:
|
||||
"""Backward fill missing dates in the time series
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inplace : bool
|
||||
Modify the time-series data in place and return None.
|
||||
|
||||
limit : int, optional
|
||||
Maximum number of periods to back fill
|
||||
|
||||
Returns
|
||||
-------
|
||||
Returns a TimeSeries object if inplace is False, otherwise None
|
||||
"""
|
||||
|
||||
eomonth = True if self.frequency.days >= AllFrequencies.M.days else False
|
||||
dates_to_fill = create_date_series(self.start_date, self.end_date, self.frequency.symbol, eomonth)
|
||||
dates_to_fill.append(self.end_date)
|
||||
|
||||
bfill_ts = dict()
|
||||
for cur_date in reversed(dates_to_fill):
|
||||
try:
|
||||
cur_val = self.data[cur_date]
|
||||
except KeyError:
|
||||
pass
|
||||
bfill_ts.update({cur_date: cur_val})
|
||||
new_ts = {k: bfill_ts[k] for k in reversed(bfill_ts)}
|
||||
if inplace:
|
||||
self.data = new_ts
|
||||
return None
|
||||
|
||||
return self.__class__(new_ts, frequency=self.frequency.symbol)
|
||||
|
||||
def calculate_returns(
|
||||
self,
|
||||
as_on: Union[str, datetime.datetime],
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: str = "previous",
|
||||
compounding: bool = True,
|
||||
years: int = 1,
|
||||
date_format: str = None
|
||||
) -> float:
|
||||
"""Method to calculate returns for a certain time-period as on a particular date
|
||||
|
||||
Parameters
|
||||
----------
|
||||
as_on : datetime.datetime
|
||||
The date as on which the return is to be calculated.
|
||||
|
||||
as_on_match : str, optional
|
||||
The mode of matching the as_on_date. Refer closest.
|
||||
|
||||
prior_match : str, optional
|
||||
The mode of matching the prior_date. Refer closest.
|
||||
|
||||
closest : str, optional
|
||||
The mode of matching the closest date.
|
||||
Valid values are 'exact', 'previous', 'next' and next.
|
||||
|
||||
compounding : bool, optional
|
||||
Whether the return should be compounded annually.
|
||||
|
||||
years : int, optional
|
||||
number of years for which the returns should be calculated
|
||||
|
||||
Returns
|
||||
-------
|
||||
The float value of the returns.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If match mode for any of the dates is exact and the exact match is not found
|
||||
* If the arguments passsed for closest, as_on_match, and prior_match are invalid
|
||||
|
||||
Example
|
||||
--------
|
||||
>>> calculate_returns(datetime.date(2020, 1, 1), years=1)
|
||||
"""
|
||||
|
||||
as_on = _parse_date(as_on, date_format)
|
||||
as_on_delta, prior_delta = _preprocess_match_options(as_on_match, prior_match, closest)
|
||||
|
||||
while True:
|
||||
current = self.data.get(as_on, None)
|
||||
if current is not None:
|
||||
break
|
||||
elif not as_on_delta:
|
||||
raise ValueError("As on date not found")
|
||||
as_on += as_on_delta
|
||||
|
||||
prev_date = as_on - relativedelta(years=years)
|
||||
while True:
|
||||
previous = self.data.get(prev_date, None)
|
||||
if previous is not None:
|
||||
break
|
||||
elif not prior_delta:
|
||||
raise ValueError("Previous date not found")
|
||||
prev_date += prior_delta
|
||||
|
||||
returns = current / previous
|
||||
if compounding:
|
||||
returns = returns ** (1 / years)
|
||||
return returns - 1
|
||||
|
||||
def calculate_rolling_returns(
|
||||
self,
|
||||
from_date: Union[datetime.date, str],
|
||||
to_date: Union[datetime.date, str],
|
||||
frequency: str = None,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: str = "previous",
|
||||
compounding: bool = True,
|
||||
years: int = 1,
|
||||
date_format: str = None
|
||||
) -> List[tuple]:
|
||||
"""Calculates the rolling return"""
|
||||
|
||||
from_date = _parse_date(from_date, date_format)
|
||||
to_date = _parse_date(to_date, date_format)
|
||||
|
||||
if frequency is None:
|
||||
frequency = self.frequency
|
||||
else:
|
||||
try:
|
||||
frequency = getattr(AllFrequencies, frequency)
|
||||
except AttributeError:
|
||||
raise ValueError(f"Invalid argument for frequency {frequency}")
|
||||
|
||||
dates = create_date_series(from_date, to_date, frequency.symbol)
|
||||
if frequency == AllFrequencies.D:
|
||||
dates = [i for i in dates if i in self.data]
|
||||
|
||||
rolling_returns = []
|
||||
for i in dates:
|
||||
returns = self.calculate_returns(
|
||||
as_on=i,
|
||||
compounding=compounding,
|
||||
years=years,
|
||||
as_on_match=as_on_match,
|
||||
prior_match=prior_match,
|
||||
closest=closest,
|
||||
)
|
||||
rolling_returns.append((i, returns))
|
||||
rolling_returns.sort()
|
||||
return rolling_returns
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
date_series = [
|
||||
datetime.datetime(2020, 1, 1),
|
||||
datetime.datetime(2020, 1, 2),
|
||||
datetime.datetime(2020, 1, 3),
|
||||
datetime.datetime(2020, 1, 4),
|
||||
datetime.datetime(2020, 1, 7),
|
||||
datetime.datetime(2020, 1, 8),
|
||||
datetime.datetime(2020, 1, 9),
|
||||
datetime.datetime(2020, 1, 10),
|
||||
datetime.datetime(2020, 1, 12),
|
||||
]
|
26
my_checks.py
Normal file
26
my_checks.py
Normal file
@ -0,0 +1,26 @@
|
||||
import datetime
|
||||
import time
|
||||
import timeit
|
||||
|
||||
import pandas
|
||||
|
||||
from pyfacts.pyfacts import AllFrequencies, TimeSeries, create_date_series
|
||||
|
||||
dfd = pandas.read_csv("test_files/msft.csv")
|
||||
dfm = pandas.read_csv("test_files/nav_history_monthly.csv")
|
||||
dfq = pandas.read_csv("test_files/nav_history_quarterly.csv")
|
||||
|
||||
data_d = [(i.date, i.nav) for i in dfd.itertuples()]
|
||||
data_m = [{"date": i.date, "value": i.nav} for i in dfm.itertuples()]
|
||||
data_q = {i.date: i.nav for i in dfq.itertuples()}
|
||||
data_q.update({"14-02-2022": 93.7})
|
||||
|
||||
tsd = TimeSeries(data_d, frequency="D")
|
||||
tsm = TimeSeries(data_m, frequency="M", date_format="%d-%m-%Y")
|
||||
tsq = TimeSeries(data_q, frequency="Q", date_format="%d-%m-%Y")
|
||||
|
||||
start = time.time()
|
||||
# ts.calculate_rolling_returns(datetime.datetime(2015, 1, 1), datetime.datetime(2022, 2, 1), years=1)
|
||||
bdata = tsq.bfill()
|
||||
# rr = tsd.calculate_rolling_returns(datetime.datetime(2022, 1, 1), datetime.datetime(2022, 2, 1), years=1)
|
||||
print(time.time() - start)
|
23
my_test.py
23
my_test.py
@ -1,23 +0,0 @@
|
||||
import time
|
||||
|
||||
import pandas
|
||||
|
||||
from fincal.fincal import AllFrequencies, Frequency, TimeSeries, create_date_series
|
||||
|
||||
dfd = pandas.read_csv('test_files/nav_history_daily - Copy.csv')
|
||||
dfm = pandas.read_csv('test_files/nav_history_monthly.csv')
|
||||
dfq = pandas.read_csv('test_files/nav_history_quarterly.csv')
|
||||
|
||||
data_d = [(i.date, i.nav) for i in dfd.itertuples()]
|
||||
data_m = [{'date': i.date, 'value': i.nav} for i in dfm.itertuples()]
|
||||
data_q = {i.date: i.nav for i in dfq.itertuples()}
|
||||
|
||||
tsd = TimeSeries(data_d, frequency='D')
|
||||
tsm = TimeSeries(data_m, frequency='M', date_format='%d-%m-%Y')
|
||||
tsq = TimeSeries(data_q, frequency='Q', date_format='%d-%m-%Y')
|
||||
|
||||
start = time.time()
|
||||
# ts.calculate_rolling_returns(datetime.datetime(2015, 1, 1), datetime.datetime(2022, 2, 1), years=1)
|
||||
# fdata = tsd.ffill()
|
||||
# rr = tsd.calculate_rolling_returns(datetime.datetime(2022, 1, 1), datetime.datetime(2022, 2, 1), years=1)
|
||||
print(time.time() - start)
|
27
pyfacts/__init__.py
Normal file
27
pyfacts/__init__.py
Normal file
@ -0,0 +1,27 @@
|
||||
from .core import *
|
||||
from .pyfacts import *
|
||||
from .statistics import *
|
||||
from .utils import *
|
||||
|
||||
__author__ = "Gourav Kumar"
|
||||
__email__ = "gouravkr@outlook.in"
|
||||
__version__ = "0.0.1"
|
||||
|
||||
|
||||
__doc__ = """
|
||||
PyFacts stands for Python library for Financial analysis and computations on time series.
|
||||
It is a library which makes it simple to work with time series data.
|
||||
|
||||
Most libraries, and languages like SQL, work with rows. Operations are performed by rows
|
||||
and not by dates. For instance, to calculate 1-year rolling returns in SQL, you are forced
|
||||
to use either a lag of 365/252 rows, leading to an approximation, or slow and cumbersome
|
||||
joins. PyFacts solves this by allowing you to work with dates and time intervals. Hence,
|
||||
to calculate 1-year returns, you will be specifying a lag of 1-year and the library will
|
||||
do the grunt work of finding the most appropriate observations to calculate these returns on.
|
||||
|
||||
PyFacts aims to simplify things by allowing you to:
|
||||
* Compare time-series data based on dates and time-period-based lag
|
||||
* Easy way to work around missing dates by taking the closest data points
|
||||
* Completing series with missing data points using forward fill and backward fill
|
||||
* Use friendly dates everywhere written as a simple string
|
||||
"""
|
1017
pyfacts/core.py
Normal file
1017
pyfacts/core.py
Normal file
File diff suppressed because it is too large
Load Diff
21
pyfacts/exceptions.py
Normal file
21
pyfacts/exceptions.py
Normal file
@ -0,0 +1,21 @@
|
||||
import datetime
|
||||
from typing import Literal
|
||||
|
||||
|
||||
class DateNotFoundError(Exception):
|
||||
"""Exception to be raised when date is not found"""
|
||||
|
||||
def __init__(self, message, date):
|
||||
message = f"{message}: {date}"
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class DateOutOfRangeError(Exception):
|
||||
"""Exception to be raised when provided date is outside the range of dates in the time series"""
|
||||
|
||||
def __init__(self, date: datetime.datetime, type: Literal["min", "max"]) -> None:
|
||||
if type == "min":
|
||||
message = f"Provided date {date} is before the first date in the TimeSeries"
|
||||
if type == "max":
|
||||
message = f"Provided date {date} is after the last date in the TimeSeries"
|
||||
super().__init__(message)
|
992
pyfacts/pyfacts.py
Normal file
992
pyfacts/pyfacts.py
Normal file
@ -0,0 +1,992 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import datetime
|
||||
import math
|
||||
import pathlib
|
||||
import statistics
|
||||
from typing import Iterable, List, Literal, Mapping, Tuple, TypedDict
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from .core import AllFrequencies, Frequency, Series, TimeSeriesCore, date_parser
|
||||
from .utils import (
|
||||
PyfactsOptions,
|
||||
_find_closest_date,
|
||||
_interval_to_years,
|
||||
_is_eomonth,
|
||||
_preprocess_match_options,
|
||||
)
|
||||
|
||||
|
||||
class MaxDrawdown(TypedDict):
|
||||
start_date: datetime.datetime
|
||||
end_date: datetime.datetime
|
||||
drawdown: float
|
||||
|
||||
|
||||
@date_parser(0, 1)
|
||||
def create_date_series(
|
||||
start_date: str | datetime.datetime,
|
||||
end_date: str | datetime.datetime,
|
||||
frequency: Literal["D", "W", "M", "Q", "H", "Y"],
|
||||
eomonth: bool = False,
|
||||
skip_weekends: bool = False,
|
||||
ensure_coverage: bool = False,
|
||||
) -> Series:
|
||||
"""Create a date series with a specified frequency
|
||||
|
||||
Parameters
|
||||
----------
|
||||
start_date : str | datetime.datetime
|
||||
Date series will always start at this date
|
||||
|
||||
end_date : str | datetime.datetime
|
||||
The date till which the series should extend
|
||||
Depending on the other parameters, this date may or may not be present
|
||||
in the final date series
|
||||
|
||||
frequency : D | W | M | Q | H | Y
|
||||
Frequency of the date series.
|
||||
The gap between each successive date will be equivalent to this frequency
|
||||
|
||||
eomonth : bool, optional
|
||||
Speacifies if the dates in the series should be end of month dates.
|
||||
Can only be used if the frequency is Monthly or lower.
|
||||
|
||||
skip_weekends: Boolean, default False
|
||||
If set to True, dates falling on weekends will not be added to the series.
|
||||
Used only when frequency is daily, weekends will necessarily be included for other frequencies.
|
||||
|
||||
ensure_coverage: Boolean, default False
|
||||
If set to true, it will ensure the last date is greater than the end date.
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[datetime.datetime]
|
||||
Returns the series as a list of datetime objects
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If eomonth is True and frequency is higher than monthly
|
||||
"""
|
||||
|
||||
frequency = getattr(AllFrequencies, frequency)
|
||||
if eomonth and frequency.days < AllFrequencies.M.days:
|
||||
raise ValueError(f"eomonth cannot be set to True if frequency is higher than {AllFrequencies.M.name}")
|
||||
|
||||
dates = []
|
||||
counter = 0
|
||||
while counter < 100000:
|
||||
diff = {frequency.freq_type: frequency.value * counter}
|
||||
date = start_date + relativedelta(**diff)
|
||||
|
||||
if eomonth:
|
||||
date += relativedelta(months=1, day=1, days=-1)
|
||||
|
||||
if date > end_date:
|
||||
if not ensure_coverage:
|
||||
break
|
||||
elif dates[-1] >= end_date:
|
||||
break
|
||||
|
||||
counter += 1
|
||||
if frequency.days > 1 or not skip_weekends:
|
||||
dates.append(date)
|
||||
elif date.weekday() < 5:
|
||||
dates.append(date)
|
||||
else:
|
||||
raise ValueError("Cannot generate a series containing more than 100000 dates")
|
||||
|
||||
return Series(dates, dtype="date")
|
||||
|
||||
|
||||
class TimeSeries(TimeSeriesCore):
|
||||
"""1-Dimensional Time Series object
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : List[Iterable] | Mapping
|
||||
Time Series data in the form of list of tuples.
|
||||
The first element of each tuple should be a date and second element should be a value.
|
||||
The following types of objects can be passed to create a TimeSeries object:
|
||||
* List of tuples containing date & value
|
||||
* List of lists containing date & value
|
||||
* List of dictionaries containing key: value pair of date and value
|
||||
* List of dictionaries with 2 keys, first representing date & second representing value
|
||||
* Dictionary of key: value pairs
|
||||
|
||||
frequency : str, optional, default "infer"
|
||||
The frequency of the time series. Default is infer.
|
||||
The class will try to infer the frequency automatically and adjust to the closest member.
|
||||
Note that inferring frequencies can fail if the data is too irregular.
|
||||
Valid values are {D, W, M, Q, H, Y}
|
||||
|
||||
validate_frequency: boolean, default True
|
||||
Whether the provided frequency should be validated against the data.
|
||||
When set to True, if the expected number of data points are not withint the expected limits,
|
||||
it will raise an Exception and object creation will fail.
|
||||
This parameter will be ignored if frequency is not provided.
|
||||
refer core._validate_frequency for more details.
|
||||
|
||||
date_format : str, optional, default "%Y-%m-%d"
|
||||
Specify the format of the date
|
||||
Required only if the first argument of tuples is a string. Otherwise ignored.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data: List[Iterable] | Mapping,
|
||||
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
|
||||
validate_frequency: bool = True,
|
||||
date_format: str = "%Y-%m-%d",
|
||||
):
|
||||
"""Instantiate a TimeSeriesCore object"""
|
||||
|
||||
super().__init__(data, frequency, validate_frequency, date_format)
|
||||
|
||||
def info(self) -> str:
|
||||
"""Summary info about the TimeSeries object"""
|
||||
|
||||
total_dates: int = len(self.data.keys())
|
||||
res_string: str = "First date: {}\nLast date: {}\nNumber of rows: {}"
|
||||
return res_string.format(self.start_date, self.end_date, total_dates)
|
||||
|
||||
def ffill(
|
||||
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = None
|
||||
) -> TimeSeries | None:
|
||||
"""Forward fill missing dates in the time series
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inplace : bool
|
||||
Modify the time-series data in place and return None.
|
||||
|
||||
limit : int, optional
|
||||
Maximum number of periods to forward fill
|
||||
|
||||
skip_weekends: bool, optional, default false
|
||||
Skip weekends while forward filling daily data
|
||||
|
||||
Returns
|
||||
-------
|
||||
Returns a TimeSeries object if inplace is False, otherwise None
|
||||
"""
|
||||
if eomonth is None:
|
||||
eomonth = _is_eomonth(self.dates)
|
||||
|
||||
dates_to_fill = create_date_series(
|
||||
self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends
|
||||
)
|
||||
|
||||
new_ts = dict()
|
||||
counter = 0
|
||||
for cur_date in dates_to_fill:
|
||||
try:
|
||||
new_val = self[cur_date]
|
||||
cur_val = new_val
|
||||
counter = 0
|
||||
except KeyError:
|
||||
if counter >= limit:
|
||||
continue
|
||||
counter += 1
|
||||
new_ts.update({cur_date: cur_val[1]})
|
||||
|
||||
if inplace:
|
||||
self.data = new_ts
|
||||
return None
|
||||
|
||||
return self.__class__(new_ts, frequency=self.frequency.symbol)
|
||||
|
||||
def bfill(
|
||||
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = None
|
||||
) -> TimeSeries | None:
|
||||
"""Backward fill missing dates in the time series
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inplace : bool
|
||||
Modify the time-series data in place and return None.
|
||||
|
||||
limit : int, optional
|
||||
Maximum number of periods to back fill
|
||||
|
||||
skip_weekends: bool, optional, default false
|
||||
Skip weekends while forward filling daily data
|
||||
|
||||
Returns
|
||||
-------
|
||||
Returns a TimeSeries object if inplace is False, otherwise None
|
||||
"""
|
||||
if eomonth is None:
|
||||
eomonth = _is_eomonth(self.dates)
|
||||
|
||||
dates_to_fill = create_date_series(
|
||||
self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends
|
||||
)
|
||||
dates_to_fill.append(self.end_date)
|
||||
|
||||
bfill_ts = dict()
|
||||
counter = 0
|
||||
for cur_date in reversed(dates_to_fill):
|
||||
try:
|
||||
new_val = self[cur_date]
|
||||
cur_val = new_val
|
||||
counter = 0
|
||||
except KeyError:
|
||||
if counter >= limit:
|
||||
continue
|
||||
counter += 1
|
||||
bfill_ts.update({cur_date: cur_val[1]})
|
||||
# new_ts = {k: bfill_ts[k] for k in reversed(bfill_ts)}
|
||||
new_ts = dict(list(reversed(bfill_ts.items())))
|
||||
if inplace:
|
||||
self.data = new_ts
|
||||
return None
|
||||
|
||||
return self.__class__(new_ts, frequency=self.frequency.symbol)
|
||||
|
||||
@date_parser(1)
|
||||
def calculate_returns(
|
||||
self,
|
||||
as_on: str | datetime.datetime,
|
||||
return_actual_date: bool = True,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next", "exact"] = "previous",
|
||||
closest_max_days: int = -1,
|
||||
if_not_found: Literal["fail", "nan"] = "fail",
|
||||
annual_compounded_returns: bool = True,
|
||||
return_period_unit: Literal["years", "months", "days"] = "years",
|
||||
return_period_value: int = 1,
|
||||
date_format: str = None,
|
||||
) -> Tuple[datetime.datetime, float]:
|
||||
"""Method to calculate returns for a certain time-period as on a particular date
|
||||
|
||||
Parameters
|
||||
----------
|
||||
as_on : datetime.datetime
|
||||
The date as on which the return is to be calculated.
|
||||
|
||||
return_actual_date : bool, default True
|
||||
If true, the output will contain the actual date based on which the return was calculated.
|
||||
Set to False to return the date passed in the as_on argument.
|
||||
|
||||
as_on_match : str, optional
|
||||
The mode of matching the as_on_date. Refer closest.
|
||||
|
||||
prior_match : str, optional
|
||||
The mode of matching the prior_date. Refer closest.
|
||||
|
||||
closest : str, optional
|
||||
The mode of matching the closest date.
|
||||
Valid values are 'exact', 'previous', 'next' and next.
|
||||
|
||||
closest_max_days: int, default -1
|
||||
The maximum acceptable gap between the provided date arguments and actual date.
|
||||
Pass -1 for no limit.
|
||||
Note: There's a hard max limit of 1000 days due to Python's limits on recursion.
|
||||
This can be overridden by importing the sys module.
|
||||
|
||||
if_not_found : 'fail' | 'nan'
|
||||
What to do when required date is not found:
|
||||
* fail: Raise a ValueError
|
||||
* nan: Return nan as the value
|
||||
|
||||
annual_compounded_returns : bool, optional
|
||||
Whether the return should be compounded annually.
|
||||
|
||||
return_period_unit : 'years', 'months', 'days'
|
||||
The type of time period to use for return calculation.
|
||||
|
||||
return_period_value : int
|
||||
The value of the specified interval type over which returns needs to be calculated.
|
||||
|
||||
date_format: str
|
||||
The date format to use for this operation.
|
||||
Should be passed as a datetime library compatible string.
|
||||
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
|
||||
|
||||
Returns
|
||||
-------
|
||||
A tuple containing the date and float value of the returns.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If match mode for any of the dates is exact and the exact match is not found
|
||||
* If the arguments passsed for closest, as_on_match, and prior_match are invalid
|
||||
|
||||
Example
|
||||
--------
|
||||
>>> ts.calculate_returns(datetime.date(2020, 1, 1), years=1)
|
||||
(datetime.datetime(2020, 1, 1, 0, 0), .0567)
|
||||
"""
|
||||
|
||||
as_on_delta, prior_delta = _preprocess_match_options(as_on_match, prior_match, closest)
|
||||
|
||||
current = _find_closest_date(self, as_on, closest_max_days, as_on_delta, if_not_found)
|
||||
|
||||
prev_date = as_on - relativedelta(**{return_period_unit: return_period_value})
|
||||
if current[1] != str("nan"):
|
||||
previous = _find_closest_date(self, prev_date, closest_max_days, prior_delta, if_not_found)
|
||||
|
||||
if (
|
||||
current[1] == str("nan")
|
||||
or previous[1] == str("nan")
|
||||
or current[0] == str("nan")
|
||||
or previous[0] == str("nan")
|
||||
):
|
||||
return as_on, float("NaN")
|
||||
|
||||
returns = current[1] / previous[1]
|
||||
if annual_compounded_returns:
|
||||
years = _interval_to_years(return_period_unit, return_period_value)
|
||||
returns = returns ** (1 / years)
|
||||
return (current[0] if return_actual_date else as_on), returns - 1
|
||||
|
||||
@date_parser(1, 2)
|
||||
def calculate_rolling_returns(
|
||||
self,
|
||||
from_date: datetime.date | str = None,
|
||||
to_date: datetime.date | str = None,
|
||||
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next", "exact"] = "previous",
|
||||
if_not_found: Literal["fail", "nan"] = "fail",
|
||||
annual_compounded_returns: bool = True,
|
||||
return_period_unit: Literal["years", "months", "days"] = "years",
|
||||
return_period_value: int = 1,
|
||||
date_format: str = None,
|
||||
) -> TimeSeries:
|
||||
"""Calculate the returns on a rolling basis.
|
||||
This is a wrapper function around the calculate_returns function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
from_date : datetime.date | str
|
||||
Start date for the return calculation.
|
||||
|
||||
to_date : datetime.date | str
|
||||
End date for the returns calculation.
|
||||
|
||||
frequency : str, optional
|
||||
Frequency at which the returns should be calculated.
|
||||
Valid values are {D, W, M, Q, H, Y}
|
||||
|
||||
as_on_match : str, optional
|
||||
The match mode to be used for the as on date.
|
||||
If not specified, the value for the closest parameter will be used.
|
||||
|
||||
prior_match : str, optional
|
||||
The match mode to be used for the prior date, i.e., the date against which the return will be calculated.
|
||||
If not specified, the value for the closest parameter will be used.
|
||||
|
||||
closest : previous | next | exact
|
||||
The default match mode for dates.
|
||||
* Previous: look for the immediate previous available date
|
||||
* Next: look for the immediate next available date
|
||||
* Exact: Only look for the exact date passed in the input
|
||||
|
||||
if_not_found : fail | nan
|
||||
Specifies what should be done if the date is not found.
|
||||
* fail: raise a DateNotFoundError.
|
||||
* nan: return nan as the value.
|
||||
Note, this will return float('NaN') and not 'nan' as string.
|
||||
|
||||
Note, this function will always raise an error if it is not possible to find a matching date.`
|
||||
For instance, if the input date is before the starting of the first date of the time series,
|
||||
but match mode is set to previous. A DateOutOfRangeError will be raised in such cases.
|
||||
|
||||
annual_compounded_returns : bool, optional
|
||||
Should the returns be compounded annually.
|
||||
|
||||
return_period_unit : years | month | days
|
||||
The interval for the return calculation.
|
||||
|
||||
return_period_value : int, optional
|
||||
The value of the interval for return calculation.
|
||||
|
||||
date_format : str, optional
|
||||
A datetime library compatible format string.
|
||||
If not specified, will use the setting in FincalOptions.date_format.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Returns the rolling returns as a TimeSeries object.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
- If an invalid argument is passed for frequency parameter.
|
||||
|
||||
See also
|
||||
--------
|
||||
TimeSeries.calculate_returns
|
||||
"""
|
||||
|
||||
if frequency is None:
|
||||
frequency = self.frequency
|
||||
else:
|
||||
try:
|
||||
frequency = getattr(AllFrequencies, frequency)
|
||||
except AttributeError:
|
||||
raise ValueError(f"Invalid argument for frequency {frequency}")
|
||||
if from_date is None:
|
||||
from_date = self.start_date + relativedelta(
|
||||
days=math.ceil(_interval_to_years(return_period_unit, return_period_value) * 365)
|
||||
)
|
||||
|
||||
if to_date is None:
|
||||
to_date = self.end_date
|
||||
|
||||
dates = create_date_series(from_date, to_date, frequency.symbol)
|
||||
if frequency == AllFrequencies.D:
|
||||
dates = [i for i in dates if i in self.data]
|
||||
|
||||
rolling_returns = []
|
||||
for i in dates:
|
||||
returns = self.calculate_returns(
|
||||
as_on=i,
|
||||
annual_compounded_returns=annual_compounded_returns,
|
||||
return_period_unit=return_period_unit,
|
||||
return_period_value=return_period_value,
|
||||
as_on_match=as_on_match,
|
||||
prior_match=prior_match,
|
||||
closest=closest,
|
||||
if_not_found=if_not_found,
|
||||
)
|
||||
rolling_returns.append(returns)
|
||||
rolling_returns.sort()
|
||||
return self.__class__(rolling_returns, frequency.symbol)
|
||||
|
||||
@date_parser(1, 2)
|
||||
def volatility(
|
||||
self,
|
||||
from_date: datetime.date | str = None,
|
||||
to_date: datetime.date | str = None,
|
||||
annualize_volatility: bool = True,
|
||||
traded_days: int = None,
|
||||
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
|
||||
return_period_unit: Literal["years", "months", "days"] = "days",
|
||||
return_period_value: int = 1,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next", "exact"] = "previous",
|
||||
if_not_found: Literal["fail", "nan"] = "fail",
|
||||
annual_compounded_returns: bool = None,
|
||||
date_format: str = None,
|
||||
) -> float:
|
||||
"""Calculates the volatility of the time series.add()
|
||||
|
||||
The volatility is calculated as the standard deviation of periodic returns.
|
||||
The periodicity of returns is based on the periodicity of underlying data.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
from_date: datetime.datetime | str, optional
|
||||
Starting date for the volatility calculation.
|
||||
Default is the first date on which volatility can be calculated based on the interval type.
|
||||
|
||||
to_date: datetime.datetime | str, optional
|
||||
Ending date for the volatility calculation.
|
||||
Default is the last date in the TimeSeries.
|
||||
|
||||
annualize_volatility: bool, default True
|
||||
Whether the volatility number should be annualized.
|
||||
Multiplies the standard deviation with the square root of the number of periods in a year
|
||||
|
||||
traded_days: bool, optional
|
||||
Number of traded days per year to be considered for annualizing volatility.
|
||||
Only used when annualizing volatility for a time series with daily frequency.
|
||||
If not provided, will use the value in FincalOptions.traded_days.
|
||||
|
||||
Remaining options are passed on to calculate_rolling_returns function.
|
||||
|
||||
Returns:
|
||||
-------
|
||||
Returns the volatility number as float
|
||||
|
||||
Raises:
|
||||
-------
|
||||
ValueError: If frequency string is outside valid values
|
||||
|
||||
Also see:
|
||||
---------
|
||||
TimeSeries.calculate_rolling_returns()
|
||||
"""
|
||||
|
||||
if frequency is None:
|
||||
frequency = self.frequency
|
||||
else:
|
||||
try:
|
||||
frequency = getattr(AllFrequencies, frequency)
|
||||
except AttributeError:
|
||||
raise ValueError(f"Invalid argument for frequency {frequency}")
|
||||
|
||||
if from_date is None:
|
||||
from_date = self.start_date + relativedelta(**{return_period_unit: return_period_value})
|
||||
if to_date is None:
|
||||
to_date = self.end_date
|
||||
years = _interval_to_years(return_period_unit, return_period_value)
|
||||
if annual_compounded_returns is None:
|
||||
if years > 1:
|
||||
annual_compounded_returns = True
|
||||
else:
|
||||
annual_compounded_returns = False
|
||||
|
||||
rolling_returns = self.calculate_rolling_returns(
|
||||
from_date=from_date,
|
||||
to_date=to_date,
|
||||
frequency=frequency.symbol,
|
||||
as_on_match=as_on_match,
|
||||
prior_match=prior_match,
|
||||
closest=closest,
|
||||
if_not_found=if_not_found,
|
||||
annual_compounded_returns=annual_compounded_returns,
|
||||
return_period_unit=return_period_unit,
|
||||
return_period_value=return_period_value,
|
||||
)
|
||||
sd = statistics.stdev(rolling_returns.values)
|
||||
if annualize_volatility:
|
||||
if traded_days is None:
|
||||
traded_days = PyfactsOptions.traded_days
|
||||
|
||||
if return_period_unit == "months":
|
||||
sd *= math.sqrt(12 / return_period_value)
|
||||
elif return_period_unit == "days":
|
||||
sd *= math.sqrt(traded_days / return_period_value)
|
||||
|
||||
return sd
|
||||
|
||||
def average_rolling_return(self, **kwargs) -> float:
|
||||
"""Calculates the average rolling return for a given period
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kwargs: parameters to be passed to the calculate_rolling_returns() function
|
||||
Refer TimeSeries.calculate_rolling_returns() method for more details
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
returns the average rolling return for a given period
|
||||
|
||||
Also see:
|
||||
---------
|
||||
TimeSeries.calculate_rolling_returns()
|
||||
"""
|
||||
kwargs["return_period_unit"] = kwargs.get("return_period_unit", self.frequency.freq_type)
|
||||
kwargs["return_period_value"] = kwargs.get("return_period_value", 1)
|
||||
|
||||
years = _interval_to_years(kwargs["return_period_unit"], kwargs["return_period_value"])
|
||||
if kwargs.get("annual_compounded_returns", True):
|
||||
if years >= 1:
|
||||
kwargs["annual_compounded_returns"] = True
|
||||
annualise_returns = False
|
||||
else:
|
||||
kwargs["annual_compounded_returns"] = False
|
||||
annualise_returns = True
|
||||
elif not kwargs["annual_compounded_returns"]:
|
||||
annualise_returns = False
|
||||
|
||||
if kwargs.get("from_date") is None:
|
||||
kwargs["from_date"] = self.start_date + relativedelta(
|
||||
**{kwargs["return_period_unit"]: kwargs["return_period_value"]}
|
||||
)
|
||||
kwargs["to_date"] = kwargs.get("to_date", self.end_date)
|
||||
|
||||
rr = self.calculate_rolling_returns(**kwargs)
|
||||
mean_rr = statistics.mean(filter(lambda x: str(x) != "nan", rr.values))
|
||||
if annualise_returns:
|
||||
mean_rr = (1 + mean_rr) ** (1 / years) - 1
|
||||
|
||||
return mean_rr
|
||||
|
||||
def max_drawdown(self) -> MaxDrawdown:
|
||||
"""Calculates the maximum fall the stock has taken between any two points.
|
||||
|
||||
Returns
|
||||
-------
|
||||
MaxDrawdown
|
||||
Returns the start_date, end_date, and the drawdown value in decimal.
|
||||
"""
|
||||
|
||||
drawdowns: dict = dict()
|
||||
|
||||
prev_val: float = 0
|
||||
prev_date: datetime.datetime = list(self.data)[0]
|
||||
|
||||
for dt, val in self.data.items():
|
||||
if val > prev_val:
|
||||
drawdowns[dt] = (dt, val, 0)
|
||||
prev_date, prev_val = dt, val
|
||||
else:
|
||||
drawdowns[dt] = (prev_date, prev_val, val / prev_val - 1)
|
||||
|
||||
max_drawdown = min(drawdowns.items(), key=lambda x: x[1][2])
|
||||
max_drawdown: MaxDrawdown = dict(
|
||||
start_date=max_drawdown[1][0], end_date=max_drawdown[0], drawdown=max_drawdown[1][2]
|
||||
)
|
||||
|
||||
return max_drawdown
|
||||
|
||||
def expand(
|
||||
self,
|
||||
to_frequency: Literal["D", "W", "M", "Q", "H"],
|
||||
method: Literal["ffill", "bfill"],
|
||||
skip_weekends: bool = False,
|
||||
eomonth: bool = False,
|
||||
) -> TimeSeries:
|
||||
"""Expand a time series to a higher frequency.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
to_frequency : "D", "W", "M", "Q", "H"
|
||||
Frequency to which the TimeSeries will be expanded.
|
||||
Must be higher than the current frequency of the TimeSeries.
|
||||
|
||||
method : ffill | bfill
|
||||
Method to be used to fill missing values.
|
||||
|
||||
skip_weekends : bool, optional
|
||||
Whether weekends should be skipped while expanding to daily.
|
||||
Will be used only if to_frequency is D
|
||||
|
||||
eomonth: bool, optional
|
||||
Whether dates should be end of month dates when frequency is monthly or lower.
|
||||
Will be used only if to_frequency is M, Q, or H
|
||||
|
||||
Returns
|
||||
-------
|
||||
TimeSeries
|
||||
Returns an object of TimeSeries class
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If Frequency cannot be recognised
|
||||
* If to_frequency is same or lower than the current frequency
|
||||
"""
|
||||
try:
|
||||
to_frequency: Frequency = getattr(AllFrequencies, to_frequency)
|
||||
except AttributeError:
|
||||
raise ValueError(f"Invalid argument for to_frequency {to_frequency}")
|
||||
|
||||
if to_frequency.days >= self.frequency.days:
|
||||
raise ValueError("TimeSeries can be only expanded to a higher frequency")
|
||||
|
||||
new_dates = create_date_series(
|
||||
self.start_date,
|
||||
self.end_date,
|
||||
frequency=to_frequency.symbol,
|
||||
skip_weekends=skip_weekends,
|
||||
eomonth=eomonth,
|
||||
ensure_coverage=True,
|
||||
)
|
||||
|
||||
closest: str = "previous" if method == "ffill" else "next"
|
||||
new_ts: dict = {dt: self.get(dt, closest=closest)[1] for dt in new_dates}
|
||||
output_ts: TimeSeries = TimeSeries(new_ts, frequency=to_frequency.symbol)
|
||||
|
||||
return output_ts
|
||||
|
||||
def shrink(
|
||||
self,
|
||||
to_frequency: Literal["W", "M", "Q", "H", "Y"],
|
||||
method: Literal["ffill", "bfill"],
|
||||
skip_weekends: bool = False,
|
||||
eomonth: bool = False,
|
||||
) -> TimeSeries:
|
||||
"""Shrink a time series to a lower frequency.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
to_frequency : "W", "M", "Q", "H", "Y"
|
||||
Frequency to which the TimeSeries will be shrunk.
|
||||
Must be lower than the current frequency of the TimeSeries.
|
||||
|
||||
method : ffill | bfill
|
||||
Method to be used to fill missing values.
|
||||
|
||||
skip_weekends : bool, optional
|
||||
Whether weekends should be skipped while shrinking to daily.
|
||||
Will be used only if to_frequency is D
|
||||
|
||||
eomonth: bool, optional
|
||||
Whether dates should be end of month dates when frequency is monthly or lower.
|
||||
Will be used only if to_frequency is M, Q, H, or Y
|
||||
|
||||
Returns
|
||||
-------
|
||||
TimeSeries
|
||||
Returns an object of TimeSeries class
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If Frequency cannot be recognised
|
||||
* If to_frequency is same or higher than the current frequency
|
||||
"""
|
||||
try:
|
||||
to_frequency: Frequency = getattr(AllFrequencies, to_frequency)
|
||||
except AttributeError:
|
||||
raise ValueError(f"Invalid argument for to_frequency {to_frequency}")
|
||||
|
||||
if to_frequency.days <= self.frequency.days:
|
||||
raise ValueError("TimeSeries can be only shrunk to a lower frequency")
|
||||
|
||||
new_dates = create_date_series(
|
||||
self.start_date,
|
||||
self.end_date,
|
||||
frequency=to_frequency.symbol,
|
||||
skip_weekends=skip_weekends,
|
||||
eomonth=eomonth,
|
||||
ensure_coverage=True,
|
||||
)
|
||||
|
||||
closest: str = "previous" if method == "ffill" else "next"
|
||||
new_ts = {}
|
||||
for dt in new_dates:
|
||||
new_ts.update({dt: self.get(dt, closest=closest)[1]})
|
||||
# new_ts: dict = {dt: self.get(dt, closest=closest)[1] for dt in new_dates}
|
||||
output_ts: TimeSeries = TimeSeries(new_ts, frequency=to_frequency.symbol)
|
||||
|
||||
return output_ts
|
||||
|
||||
def sync(self, other: TimeSeries, fill_method: Literal["ffill", "bfill"] = "ffill") -> TimeSeries:
|
||||
"""Synchronize two TimeSeries objects
|
||||
|
||||
This will ensure that both time series have the same frequency and same set of dates.
|
||||
The frequency will be set to the higher of the two objects.
|
||||
Dates will be taken from the class on which the method is called.
|
||||
Values will be taken from the other class.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
other: TimeSeries
|
||||
Another object of TimeSeries class whose dates need to be synchronized
|
||||
|
||||
fill_method: ffill | bfill, default ffill
|
||||
Method to use to fill missing values in time series when synchronizing
|
||||
|
||||
Returns:
|
||||
--------
|
||||
Returns another object of TimeSeries class
|
||||
|
||||
Raises:
|
||||
--------
|
||||
Raises TypeError if the other object is not of TimeSeries class
|
||||
"""
|
||||
|
||||
if not isinstance(other, TimeSeries):
|
||||
raise TypeError("Only objects of type TimeSeries can be passed for sync")
|
||||
|
||||
if self.frequency.days < other.frequency.days:
|
||||
other = other.expand(to_frequency=self.frequency.symbol, method=fill_method)
|
||||
if self.frequency.days > other.frequency.days:
|
||||
other = other.shrink(to_frequency=other.frequency.symbol, method=fill_method)
|
||||
|
||||
new_other: dict = {}
|
||||
closest = "previous" if fill_method == "ffill" else "next"
|
||||
for dt in self.dates:
|
||||
if dt in other:
|
||||
new_other[dt] = other[dt][1]
|
||||
else:
|
||||
new_other[dt] = other.get(dt, closest=closest)[1]
|
||||
|
||||
return self.__class__(new_other, frequency=other.frequency.symbol)
|
||||
|
||||
def mean(self) -> float:
|
||||
"""Calculates the mean value of the time series data"""
|
||||
|
||||
return statistics.mean(self.values)
|
||||
|
||||
def transform(
|
||||
self,
|
||||
to_frequency: Literal["W", "M", "Q", "H", "Y"],
|
||||
method: Literal["sum", "mean"],
|
||||
eomonth: bool = False,
|
||||
ensure_coverage: bool = True,
|
||||
anchor_date=Literal["start", "end"],
|
||||
) -> TimeSeries:
|
||||
"""Transform a time series object into a lower frequency object with an aggregation function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
to_frequency:
|
||||
Frequency to which the time series needs to be transformed
|
||||
|
||||
method:
|
||||
Aggregation method to be used. Can be either mean or sum
|
||||
|
||||
eomonth:
|
||||
User end of month dates. Only applicable for frequencies monthly and lower.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Returns a TimeSeries object
|
||||
|
||||
Raises
|
||||
-------
|
||||
ValueError:
|
||||
* If invalid input is passed for frequency
|
||||
* if invalid input is passed for method
|
||||
* If to_frequency is higher than the current frequency
|
||||
"""
|
||||
|
||||
try:
|
||||
to_frequency: Frequency = getattr(AllFrequencies, to_frequency)
|
||||
except AttributeError:
|
||||
raise ValueError(f"Invalid argument for to_frequency {to_frequency}")
|
||||
|
||||
if to_frequency.days <= self.frequency.days:
|
||||
raise ValueError("TimeSeries can be only shrunk to a lower frequency")
|
||||
|
||||
if method not in ["sum", "mean"]:
|
||||
raise ValueError(f"Method not recognised: {method}")
|
||||
|
||||
dates = create_date_series(
|
||||
self.start_date,
|
||||
self.end_date, # + relativedelta(days=to_frequency.days),
|
||||
to_frequency.symbol,
|
||||
ensure_coverage=ensure_coverage,
|
||||
eomonth=eomonth,
|
||||
)
|
||||
# prev_date = dates[0]
|
||||
|
||||
new_ts_dict = {}
|
||||
for idx, date in enumerate(dates):
|
||||
if idx == 0:
|
||||
cur_data = self[self.dates <= date]
|
||||
else:
|
||||
cur_data = self[(self.dates <= date) & (self.dates > dates[idx - 1])]
|
||||
if method == "sum":
|
||||
value = sum(cur_data.values)
|
||||
elif method == "mean":
|
||||
value = cur_data.mean()
|
||||
|
||||
new_ts_dict.update({date: value})
|
||||
# prev_date = date
|
||||
|
||||
return self.__class__(new_ts_dict, to_frequency.symbol)
|
||||
|
||||
|
||||
def _preprocess_csv(
|
||||
file_path: str | pathlib.Path, delimiter: str = ",", encoding: str = "utf-8", **kwargs
|
||||
) -> List[list]:
|
||||
"""Preprocess csv data"""
|
||||
|
||||
if isinstance(file_path, str):
|
||||
file_path = pathlib.Path(file_path)
|
||||
|
||||
if not file_path.exists():
|
||||
raise ValueError("File not found. Check the file path")
|
||||
|
||||
with open(file_path, "r", encoding=encoding) as file:
|
||||
reader: csv.reader = csv.reader(file, delimiter=delimiter, **kwargs)
|
||||
csv_data: list = list(reader)
|
||||
|
||||
csv_data = [i for i in csv_data if i] # remove blank rows
|
||||
if not csv_data:
|
||||
raise ValueError("File is empty")
|
||||
|
||||
return csv_data
|
||||
|
||||
|
||||
def read_csv(
|
||||
csv_file_path: str | pathlib.Path,
|
||||
frequency: Literal["D", "W", "M", "Q", "Y"],
|
||||
date_format: str = None,
|
||||
col_names: Tuple[str, str] = None,
|
||||
col_index: Tuple[int, int] = (0, 1),
|
||||
has_header: bool = True,
|
||||
skip_rows: int = 0,
|
||||
nrows: int = -1,
|
||||
delimiter: str = ",",
|
||||
encoding: str = "utf-8",
|
||||
**kwargs,
|
||||
) -> TimeSeries:
|
||||
"""Reads Time Series data directly from a CSV file
|
||||
|
||||
Parameters
|
||||
----------
|
||||
csv_file_pah:
|
||||
path of the csv file to be read.
|
||||
|
||||
frequency:
|
||||
frequency of the time series data.
|
||||
|
||||
date_format:
|
||||
date format, specified as datetime compatible string
|
||||
|
||||
col_names:
|
||||
specify the column headers to be read.
|
||||
this parameter will allow you to read two columns from a CSV file which may have more columns.
|
||||
this parameter overrides col_index parameter.
|
||||
|
||||
dol_index:
|
||||
specify the column numbers to be read.
|
||||
this parameter will allow you to read two columns from a CSV file which may have more columns.
|
||||
if neither names nor index is specified, the first two columns from the csv file will be read,
|
||||
with the first being treated as date.
|
||||
|
||||
has_header:
|
||||
specify whether the file has a header row.
|
||||
if true, the header row will be ignored while creating the time series data.
|
||||
|
||||
skip_rows:
|
||||
the number of rows after the header which should be skipped.
|
||||
|
||||
nrows:
|
||||
the number of rows to be read from the csv file.
|
||||
|
||||
delimiter:
|
||||
specify the delimeter used in the csv file.
|
||||
|
||||
encoding:
|
||||
specify the encoding of the csv file.
|
||||
|
||||
kwargs:
|
||||
other keyword arguments to be passed on the csv.reader()
|
||||
"""
|
||||
|
||||
data = _preprocess_csv(csv_file_path, delimiter, encoding)
|
||||
|
||||
read_start_row = skip_rows
|
||||
read_end_row = skip_rows + nrows if nrows >= 0 else None
|
||||
|
||||
if has_header:
|
||||
header = data[read_start_row]
|
||||
print(header)
|
||||
# fmt: off
|
||||
# Black and pylance disagree on the formatting of the following line, hence formatting is disabled
|
||||
data = data[(read_start_row + 1):read_end_row]
|
||||
# fmt: on
|
||||
|
||||
if col_names is not None:
|
||||
date_col = header.index(col_names[0])
|
||||
value_col = header.index(col_names[1])
|
||||
else:
|
||||
date_col = col_index[0]
|
||||
value_col = col_index[1]
|
||||
|
||||
ts_data = [(i[date_col], i[value_col]) for i in data if i]
|
||||
|
||||
return TimeSeries(ts_data, frequency=frequency, date_format=date_format)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
date_series = [
|
||||
datetime.datetime(2020, 1, 11),
|
||||
datetime.datetime(2020, 1, 12),
|
||||
datetime.datetime(2020, 1, 13),
|
||||
datetime.datetime(2020, 1, 14),
|
||||
datetime.datetime(2020, 1, 17),
|
||||
datetime.datetime(2020, 1, 18),
|
||||
datetime.datetime(2020, 1, 19),
|
||||
datetime.datetime(2020, 1, 20),
|
||||
datetime.datetime(2020, 1, 22),
|
||||
]
|
621
pyfacts/statistics.py
Normal file
621
pyfacts/statistics.py
Normal file
@ -0,0 +1,621 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import math
|
||||
import statistics
|
||||
from typing import Literal
|
||||
|
||||
from pyfacts.core import date_parser
|
||||
|
||||
from .pyfacts import TimeSeries, create_date_series
|
||||
from .utils import _interval_to_years, _preprocess_from_to_date, covariance
|
||||
|
||||
# from dateutil.relativedelta import relativedelta
|
||||
|
||||
|
||||
@date_parser(3, 4)
|
||||
def sharpe_ratio(
|
||||
time_series_data: TimeSeries,
|
||||
risk_free_data: TimeSeries = None,
|
||||
risk_free_rate: float = None,
|
||||
from_date: str | datetime.datetime = None,
|
||||
to_date: str | datetime.datetime = None,
|
||||
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
|
||||
return_period_unit: Literal["years", "months", "days"] = "years",
|
||||
return_period_value: int = 1,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next"] = "previous",
|
||||
date_format: str = None,
|
||||
) -> float:
|
||||
"""Calculate the Sharpe ratio of any time series
|
||||
|
||||
Sharpe ratio is a measure of returns per unit of risk,
|
||||
where risk is measured by the standard deviation of the returns.
|
||||
|
||||
The formula for Sharpe ratio is:
|
||||
(average asset return - risk free rate)/volatility of asset returns
|
||||
|
||||
Parameters
|
||||
----------
|
||||
time_series_data:
|
||||
The time series for which Sharpe ratio needs to be calculated
|
||||
|
||||
risk_free_data:
|
||||
Risk free rates as time series data.
|
||||
This should be the time series of risk free returns,
|
||||
and not the underlying asset value.
|
||||
|
||||
risk_free_rate:
|
||||
Risk free rate to be used.
|
||||
Either risk_free_data or risk_free_rate needs to be provided.
|
||||
If both are provided, the time series data will be used.
|
||||
|
||||
from_date:
|
||||
Start date from which returns should be calculated.
|
||||
Defaults to the first date of the series.
|
||||
|
||||
to_date:
|
||||
End date till which returns should be calculated.
|
||||
Defaults to the last date of the series.
|
||||
|
||||
frequency:
|
||||
The frequency at which returns should be calculated.
|
||||
|
||||
return_period_unit: 'years', 'months', 'days'
|
||||
The type of time period to use for return calculation.
|
||||
|
||||
return_period_value: int
|
||||
The value of the specified interval type over which returns needs to be calculated.
|
||||
|
||||
as_on_match: str, optional
|
||||
The mode of matching the as_on_date. Refer closest.
|
||||
|
||||
prior_match: str, optional
|
||||
The mode of matching the prior_date. Refer closest.
|
||||
|
||||
closest: str, optional
|
||||
The mode of matching the closest date.
|
||||
Valid values are 'exact', 'previous', 'next' and next.
|
||||
|
||||
The date format to use for this operation.
|
||||
Should be passed as a datetime library compatible string.
|
||||
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
|
||||
|
||||
Returns
|
||||
-------
|
||||
Value of Sharpe ratio as a float.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If risk free data or risk free rate is not provided.
|
||||
"""
|
||||
|
||||
interval_days = math.ceil(_interval_to_years(return_period_unit, return_period_value) * 365)
|
||||
|
||||
if from_date is None:
|
||||
from_date = time_series_data.start_date + datetime.timedelta(days=interval_days)
|
||||
if to_date is None:
|
||||
to_date = time_series_data.end_date
|
||||
|
||||
if risk_free_data is None and risk_free_rate is None:
|
||||
raise ValueError("At least one of risk_free_data or risk_free rate is required")
|
||||
elif risk_free_data is not None:
|
||||
risk_free_rate = risk_free_data.mean()
|
||||
|
||||
common_params = {
|
||||
"from_date": from_date,
|
||||
"to_date": to_date,
|
||||
"frequency": frequency,
|
||||
"return_period_unit": return_period_unit,
|
||||
"return_period_value": return_period_value,
|
||||
"as_on_match": as_on_match,
|
||||
"prior_match": prior_match,
|
||||
"closest": closest,
|
||||
"date_format": date_format,
|
||||
}
|
||||
average_rr = time_series_data.average_rolling_return(**common_params, annual_compounded_returns=True)
|
||||
|
||||
excess_returns = average_rr - risk_free_rate
|
||||
sd = time_series_data.volatility(
|
||||
**common_params,
|
||||
annualize_volatility=True,
|
||||
)
|
||||
|
||||
sharpe_ratio_value = excess_returns / sd
|
||||
return sharpe_ratio_value
|
||||
|
||||
|
||||
@date_parser(2, 3)
|
||||
def beta(
|
||||
asset_data: TimeSeries,
|
||||
market_data: TimeSeries,
|
||||
from_date: str | datetime.datetime = None,
|
||||
to_date: str | datetime.datetime = None,
|
||||
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
|
||||
return_period_unit: Literal["years", "months", "days"] = "years",
|
||||
return_period_value: int = 1,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next"] = "previous",
|
||||
date_format: str = None,
|
||||
) -> float:
|
||||
"""Beta is a measure of sensitivity of asset returns to market returns
|
||||
|
||||
The formula for beta is:
|
||||
|
||||
Parameters
|
||||
----------
|
||||
asset_data: TimeSeries
|
||||
The time series data of the asset
|
||||
|
||||
market_data: TimeSeries
|
||||
The time series data of the relevant market index
|
||||
|
||||
from_date:
|
||||
Start date from which returns should be calculated.
|
||||
Defaults to the first date of the series.
|
||||
|
||||
to_date:
|
||||
End date till which returns should be calculated.
|
||||
Defaults to the last date of the series.
|
||||
|
||||
frequency:
|
||||
The frequency at which returns should be calculated.
|
||||
|
||||
return_period_unit: 'years', 'months', 'days'
|
||||
The type of time period to use for return calculation.
|
||||
|
||||
return_period_value: int
|
||||
The value of the specified interval type over which returns needs to be calculated.
|
||||
|
||||
as_on_match: str, optional
|
||||
The mode of matching the as_on_date. Refer closest.
|
||||
|
||||
prior_match: str, optional
|
||||
The mode of matching the prior_date. Refer closest.
|
||||
|
||||
closest: str, optional
|
||||
The mode of matching the closest date.
|
||||
Valid values are 'exact', 'previous', 'next' and next.
|
||||
|
||||
The date format to use for this operation.
|
||||
Should be passed as a datetime library compatible string.
|
||||
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
|
||||
|
||||
Returns
|
||||
-------
|
||||
The value of beta as a float.
|
||||
"""
|
||||
interval_years = _interval_to_years(return_period_unit, return_period_value)
|
||||
interval_days = math.ceil(interval_years * 365)
|
||||
|
||||
annual_compounded_returns = True if interval_years > 1 else False
|
||||
|
||||
if from_date is None:
|
||||
from_date = asset_data.start_date + datetime.timedelta(days=interval_days)
|
||||
if to_date is None:
|
||||
to_date = asset_data.end_date
|
||||
|
||||
common_params = {
|
||||
"from_date": from_date,
|
||||
"to_date": to_date,
|
||||
"frequency": frequency,
|
||||
"return_period_unit": return_period_unit,
|
||||
"return_period_value": return_period_value,
|
||||
"as_on_match": as_on_match,
|
||||
"prior_match": prior_match,
|
||||
"closest": closest,
|
||||
"date_format": date_format,
|
||||
"annual_compounded_returns": annual_compounded_returns,
|
||||
}
|
||||
|
||||
asset_rr = asset_data.calculate_rolling_returns(**common_params)
|
||||
market_rr = market_data.calculate_rolling_returns(**common_params)
|
||||
|
||||
cov = covariance(asset_rr.values, market_rr.values)
|
||||
market_var = statistics.variance(market_rr.values)
|
||||
|
||||
beta = cov / market_var
|
||||
return beta
|
||||
|
||||
|
||||
@date_parser(4, 5)
|
||||
def jensens_alpha(
|
||||
asset_data: TimeSeries,
|
||||
market_data: TimeSeries,
|
||||
risk_free_data: TimeSeries = None,
|
||||
risk_free_rate: float = None,
|
||||
from_date: str | datetime.datetime = None,
|
||||
to_date: str | datetime.datetime = None,
|
||||
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
|
||||
return_period_unit: Literal["years", "months", "days"] = "years",
|
||||
return_period_value: int = 1,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next"] = "previous",
|
||||
date_format: str = None,
|
||||
) -> float:
|
||||
"""
|
||||
This function calculates the Jensen's alpha for a time series.
|
||||
The formula for Jensen's alpha is:
|
||||
Ri - Rf + B x (Rm - Rf)
|
||||
where:
|
||||
Ri = Realized return of the portfolio or investment
|
||||
Rf = The risk free rate during the return time frame
|
||||
B = Beta of the portfolio or investment
|
||||
Rm = Realized return of the market index
|
||||
|
||||
Parameters
|
||||
----------
|
||||
asset_data: TimeSeries
|
||||
The time series data of the asset
|
||||
|
||||
market_data: TimeSeries
|
||||
The time series data of the relevant market index
|
||||
|
||||
risk_free_data:
|
||||
Risk free rates as time series data.
|
||||
This should be the time series of risk free returns,
|
||||
and not the underlying asset value.
|
||||
|
||||
risk_free_rate:
|
||||
Risk free rate to be used.
|
||||
Either risk_free_data or risk_free_rate needs to be provided.
|
||||
If both are provided, the time series data will be used.
|
||||
|
||||
from_date:
|
||||
Start date from which returns should be calculated.
|
||||
Defaults to the first date of the series.
|
||||
|
||||
to_date:
|
||||
End date till which returns should be calculated.
|
||||
Defaults to the last date of the series.
|
||||
|
||||
frequency:
|
||||
The frequency at which returns should be calculated.
|
||||
|
||||
return_period_unit: 'years', 'months', 'days'
|
||||
The type of time period to use for return calculation.
|
||||
|
||||
return_period_value: int
|
||||
The value of the specified interval type over which returns needs to be calculated.
|
||||
|
||||
as_on_match: str, optional
|
||||
The mode of matching the as_on_date. Refer closest.
|
||||
|
||||
prior_match: str, optional
|
||||
The mode of matching the prior_date. Refer closest.
|
||||
|
||||
closest: str, optional
|
||||
The mode of matching the closest date.
|
||||
Valid values are 'exact', 'previous', 'next' and next.
|
||||
|
||||
The date format to use for this operation.
|
||||
Should be passed as a datetime library compatible string.
|
||||
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
|
||||
|
||||
Returns
|
||||
-------
|
||||
The value of Jensen's alpha as a float.
|
||||
"""
|
||||
|
||||
interval_years = _interval_to_years(return_period_unit, return_period_value)
|
||||
interval_days = math.ceil(interval_years * 365)
|
||||
|
||||
if from_date is None:
|
||||
from_date = asset_data.start_date + datetime.timedelta(days=interval_days)
|
||||
if to_date is None:
|
||||
to_date = asset_data.end_date
|
||||
|
||||
common_params = {
|
||||
"from_date": from_date,
|
||||
"to_date": to_date,
|
||||
"frequency": frequency,
|
||||
"return_period_unit": return_period_unit,
|
||||
"return_period_value": return_period_value,
|
||||
"as_on_match": as_on_match,
|
||||
"prior_match": prior_match,
|
||||
"closest": closest,
|
||||
"date_format": date_format,
|
||||
}
|
||||
|
||||
num_days = (to_date - from_date).days
|
||||
compound_realised_returns = True if num_days > 365 else False
|
||||
realized_return = asset_data.calculate_returns(
|
||||
as_on=to_date,
|
||||
return_period_unit="days",
|
||||
return_period_value=num_days,
|
||||
annual_compounded_returns=compound_realised_returns,
|
||||
as_on_match=as_on_match,
|
||||
prior_match=prior_match,
|
||||
closest=closest,
|
||||
date_format=date_format,
|
||||
)
|
||||
market_return = market_data.calculate_returns(
|
||||
as_on=to_date,
|
||||
return_period_unit="days",
|
||||
return_period_value=num_days,
|
||||
annual_compounded_returns=compound_realised_returns,
|
||||
as_on_match=as_on_match,
|
||||
prior_match=prior_match,
|
||||
closest=closest,
|
||||
date_format=date_format,
|
||||
)
|
||||
beta_value = beta(asset_data=asset_data, market_data=market_data, **common_params)
|
||||
|
||||
if risk_free_data is None and risk_free_rate is None:
|
||||
raise ValueError("At least one of risk_free_data or risk_free rate is required")
|
||||
elif risk_free_data is not None:
|
||||
risk_free_rate = risk_free_data.mean()
|
||||
|
||||
jensens_alpha = realized_return[1] - risk_free_rate + beta_value * (market_return[1] - risk_free_rate)
|
||||
return jensens_alpha
|
||||
|
||||
|
||||
@date_parser(2, 3)
|
||||
def correlation(
|
||||
data1: TimeSeries,
|
||||
data2: TimeSeries,
|
||||
from_date: str | datetime.datetime = None,
|
||||
to_date: str | datetime.datetime = None,
|
||||
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
|
||||
return_period_unit: Literal["years", "months", "days"] = "years",
|
||||
return_period_value: int = 1,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next"] = "previous",
|
||||
date_format: str = None,
|
||||
) -> float:
|
||||
"""Calculate the correlation between two assets
|
||||
|
||||
correlation calculation is done based on rolling returns.
|
||||
It must be noted that correlation is not calculated directly on the asset prices.
|
||||
The asset prices used to calculate returns and correlation is then calculated based on these returns.
|
||||
Hence this function requires all parameters for rolling returns calculations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data1: TimeSeries
|
||||
The first time series data
|
||||
|
||||
data2: TimeSeries
|
||||
The second time series data
|
||||
|
||||
from_date:
|
||||
Start date from which returns should be calculated.
|
||||
Defaults to the first date of the series.
|
||||
|
||||
to_date:
|
||||
End date till which returns should be calculated.
|
||||
Defaults to the last date of the series.
|
||||
|
||||
frequency:
|
||||
The frequency at which returns should be calculated.
|
||||
|
||||
return_period_unit: 'years', 'months', 'days'
|
||||
The type of time period to use for return calculation.
|
||||
|
||||
return_period_value: int
|
||||
The value of the specified interval type over which returns needs to be calculated.
|
||||
|
||||
as_on_match: str, optional
|
||||
The mode of matching the as_on_date. Refer closest.
|
||||
|
||||
prior_match: str, optional
|
||||
The mode of matching the prior_date. Refer closest.
|
||||
|
||||
closest: str, optional
|
||||
The mode of matching the closest date.
|
||||
Valid values are 'exact', 'previous', 'next' and next.
|
||||
|
||||
The date format to use for this operation.
|
||||
Should be passed as a datetime library compatible string.
|
||||
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
|
||||
|
||||
Returns
|
||||
-------
|
||||
The value of beta as a float.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError:
|
||||
* If frequency of both TimeSeries do not match
|
||||
* If both time series do not have data between the from date and to date
|
||||
"""
|
||||
interval_years = _interval_to_years(return_period_unit, return_period_value)
|
||||
interval_days = math.ceil(interval_years * 365)
|
||||
|
||||
annual_compounded_returns = True if interval_years > 1 else False
|
||||
|
||||
if from_date is None:
|
||||
from_date = data1.start_date + datetime.timedelta(days=interval_days)
|
||||
if to_date is None:
|
||||
to_date = data1.end_date
|
||||
|
||||
if data1.frequency != data2.frequency:
|
||||
raise ValueError("Correlation calculation requires both time series to be of same frequency")
|
||||
|
||||
if from_date < data2.start_date or to_date > data2.end_date:
|
||||
raise ValueError("Data between from_date and to_date must be present in both time series")
|
||||
|
||||
common_params = {
|
||||
"from_date": from_date,
|
||||
"to_date": to_date,
|
||||
"frequency": frequency,
|
||||
"return_period_unit": return_period_unit,
|
||||
"return_period_value": return_period_value,
|
||||
"as_on_match": as_on_match,
|
||||
"prior_match": prior_match,
|
||||
"closest": closest,
|
||||
"date_format": date_format,
|
||||
"annual_compounded_returns": annual_compounded_returns,
|
||||
}
|
||||
|
||||
asset_rr = data1.calculate_rolling_returns(**common_params)
|
||||
market_rr = data2.calculate_rolling_returns(**common_params)
|
||||
|
||||
cor = statistics.correlation(asset_rr.values, market_rr.values)
|
||||
return cor
|
||||
|
||||
|
||||
@date_parser(3, 4)
|
||||
def sortino_ratio(
|
||||
time_series_data: TimeSeries,
|
||||
risk_free_data: TimeSeries = None,
|
||||
risk_free_rate: float = None,
|
||||
from_date: str | datetime.datetime = None,
|
||||
to_date: str | datetime.datetime = None,
|
||||
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
|
||||
return_period_unit: Literal["years", "months", "days"] = "years",
|
||||
return_period_value: int = 1,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next"] = "previous",
|
||||
date_format: str = None,
|
||||
) -> float:
|
||||
"""Calculate the Sortino ratio of any time series
|
||||
|
||||
Sortino ratio is a variation of the Sharpe ratio,
|
||||
where risk is measured as standard deviation of negative returns only.
|
||||
Since deviation on the positive side is not undesirable, hence sortino ratio excludes positive deviations.
|
||||
|
||||
The formula for Sortino ratio is:
|
||||
(average asset return - risk free rate)/volatility of negative asset returns
|
||||
|
||||
Parameters
|
||||
----------
|
||||
time_series_data:
|
||||
The time series for which Sharpe ratio needs to be calculated
|
||||
|
||||
risk_free_data:
|
||||
Risk free rates as time series data.
|
||||
This should be the time series of risk free returns,
|
||||
and not the underlying asset value.
|
||||
|
||||
risk_free_rate:
|
||||
Risk free rate to be used.
|
||||
Either risk_free_data or risk_free_rate needs to be provided.
|
||||
If both are provided, the time series data will be used.
|
||||
|
||||
from_date:
|
||||
Start date from which returns should be calculated.
|
||||
Defaults to the first date of the series.
|
||||
|
||||
to_date:
|
||||
End date till which returns should be calculated.
|
||||
Defaults to the last date of the series.
|
||||
|
||||
frequency:
|
||||
The frequency at which returns should be calculated.
|
||||
|
||||
return_period_unit: 'years', 'months', 'days'
|
||||
The type of time period to use for return calculation.
|
||||
|
||||
return_period_value: int
|
||||
The value of the specified interval type over which returns needs to be calculated.
|
||||
|
||||
as_on_match: str, optional
|
||||
The mode of matching the as_on_date. Refer closest.
|
||||
|
||||
prior_match: str, optional
|
||||
The mode of matching the prior_date. Refer closest.
|
||||
|
||||
closest: str, optional
|
||||
The mode of matching the closest date.
|
||||
Valid values are 'exact', 'previous', 'next' and next.
|
||||
|
||||
The date format to use for this operation.
|
||||
Should be passed as a datetime library compatible string.
|
||||
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
|
||||
|
||||
Returns
|
||||
-------
|
||||
Value of Sortino ratio as a float.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If risk free data or risk free rate is not provided.
|
||||
"""
|
||||
|
||||
interval_days = math.ceil(_interval_to_years(return_period_unit, return_period_value) * 365)
|
||||
|
||||
# if from_date is None:
|
||||
# from_date = time_series_data.start_date + relativedelta(**{return_period_unit: return_period_value})
|
||||
# if to_date is None:
|
||||
# to_date = time_series_data.end_date
|
||||
from_date, to_date = _preprocess_from_to_date(
|
||||
from_date,
|
||||
to_date,
|
||||
time_series_data,
|
||||
False,
|
||||
return_period_unit,
|
||||
return_period_value,
|
||||
as_on_match,
|
||||
prior_match,
|
||||
closest,
|
||||
)
|
||||
|
||||
if risk_free_data is None and risk_free_rate is None:
|
||||
raise ValueError("At least one of risk_free_data or risk_free rate is required")
|
||||
elif risk_free_data is not None:
|
||||
risk_free_rate = risk_free_data.mean()
|
||||
|
||||
common_params = {
|
||||
"from_date": from_date,
|
||||
"to_date": to_date,
|
||||
"frequency": frequency,
|
||||
"return_period_unit": return_period_unit,
|
||||
"return_period_value": return_period_value,
|
||||
"as_on_match": as_on_match,
|
||||
"prior_match": prior_match,
|
||||
"closest": closest,
|
||||
"date_format": date_format,
|
||||
}
|
||||
average_rr_ts = time_series_data.calculate_rolling_returns(
|
||||
**common_params, annual_compounded_returns=False, if_not_found="nan"
|
||||
)
|
||||
average_rr = statistics.mean(filter(lambda x: str(x) != "nan", average_rr_ts.values))
|
||||
annualized_average_rr = (1 + average_rr) ** (365 / interval_days) - 1
|
||||
|
||||
excess_returns = annualized_average_rr - risk_free_rate
|
||||
my_list = [i for i in average_rr_ts.values if i < 0]
|
||||
sd = statistics.stdev(my_list) # [i for i in average_rr_ts.values if i < 0])
|
||||
sd *= math.sqrt(365 / interval_days)
|
||||
|
||||
sortino_ratio_value = excess_returns / sd
|
||||
return sortino_ratio_value
|
||||
|
||||
|
||||
@date_parser(3, 4)
|
||||
def moving_average(
|
||||
time_series_data: TimeSeries,
|
||||
moving_average_period_unit: Literal["years", "months", "days"],
|
||||
moving_average_period_value: int,
|
||||
from_date: str | datetime.datetime = None,
|
||||
to_date: str | datetime.datetime = None,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next"] = "previous",
|
||||
date_format: str = None,
|
||||
) -> TimeSeries:
|
||||
|
||||
from_date, to_date = _preprocess_from_to_date(
|
||||
from_date,
|
||||
to_date,
|
||||
time_series_data,
|
||||
False,
|
||||
return_period_unit=moving_average_period_unit,
|
||||
return_period_value=moving_average_period_value,
|
||||
as_on_match=as_on_match,
|
||||
prior_match=prior_match,
|
||||
closest=closest,
|
||||
)
|
||||
|
||||
dates = create_date_series(from_date, to_date, time_series_data.frequency.symbol)
|
||||
|
||||
for date in dates:
|
||||
start_date = date - datetime.timedelta(**{moving_average_period_unit: moving_average_period_value})
|
||||
time_series_data[start_date:date]
|
265
pyfacts/utils.py
Normal file
265
pyfacts/utils.py
Normal file
@ -0,0 +1,265 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import statistics
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Literal, Mapping, Sequence, Tuple
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from .exceptions import DateNotFoundError, DateOutOfRangeError
|
||||
|
||||
|
||||
@dataclass
|
||||
class PyfactsOptions:
|
||||
date_format: str = "%Y-%m-%d"
|
||||
closest: str = "previous" # next
|
||||
traded_days: int = 365
|
||||
get_closest: str = "exact"
|
||||
|
||||
|
||||
def _parse_date(date: str, date_format: str = None) -> datetime.datetime:
|
||||
"""Parses date and handles errors
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
date: str | datetime.date
|
||||
The date to be parsed.
|
||||
If the date passed is already a datetime object, it will return it unprocessed.
|
||||
|
||||
date_format: str, default None
|
||||
The format of the date string in datetime.strftime friendly format.
|
||||
If format is None, format in FincalOptions.date_format will be used.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
Returns a datetime.datetime object.
|
||||
|
||||
Raises:
|
||||
-------
|
||||
TypeError: If the is not a date-like string
|
||||
ValueError: If the date could not be parsed with the given format
|
||||
"""
|
||||
|
||||
if isinstance(date, (datetime.datetime, datetime.date)):
|
||||
return datetime.datetime.fromordinal(date.toordinal())
|
||||
|
||||
if date_format is None:
|
||||
date_format = PyfactsOptions.date_format
|
||||
|
||||
try:
|
||||
date = datetime.datetime.strptime(date, date_format)
|
||||
except TypeError:
|
||||
raise ValueError("Date does not seem to be valid date-like string")
|
||||
except ValueError:
|
||||
raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
|
||||
return date
|
||||
|
||||
|
||||
def _preprocess_timeseries(
|
||||
data: Sequence[Tuple[str | datetime.datetime, float]]
|
||||
| Sequence[Mapping[str | datetime.datetime, float]]
|
||||
| Mapping[str | datetime.datetime, float],
|
||||
date_format: str,
|
||||
) -> List[Tuple[datetime.datetime, float]]:
|
||||
"""Converts any type of list to the TimeSeries friendly format.
|
||||
This function is internally called by the __init__ function of the TimeSeriesCore class
|
||||
|
||||
The TimeSeries class can internally process a list of Tuples.
|
||||
However, users have the option of passing a variety of types.
|
||||
This function preprocesses the data and converts it into the relevant format.
|
||||
|
||||
If the data is a dictionary, it will be converted using .items() iteration.
|
||||
If the data is not a dictionary or a list, it will raise an error.
|
||||
If the data is of list type:
|
||||
* If the first item is also of list type, it will be parsed as a list of lists
|
||||
* If the first item is a dictionary with one key, then key will be parsed as date
|
||||
* If the first item is a dictionary with two keys, then first key will be date and second will be value
|
||||
* If the first element is of another type, it will raise an error
|
||||
|
||||
The final return value is sorted by date
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
Data:
|
||||
The data for the time series. Can be a dictionary, a list of tuples, or a list of dictionaries.
|
||||
|
||||
date_format: str
|
||||
The format of the date in strftime friendly format.
|
||||
|
||||
Returns:
|
||||
-----------
|
||||
Returns a list of Tuples where the first element of each tuple is of datetime.datetime class
|
||||
and the second element is of float class
|
||||
|
||||
Raises:
|
||||
--------
|
||||
TypeError: If the data is not in a format which can be parsed.
|
||||
"""
|
||||
|
||||
if isinstance(data, Mapping):
|
||||
current_data: List[tuple] = [(k, v) for k, v in data.items()]
|
||||
return _preprocess_timeseries(current_data, date_format)
|
||||
|
||||
# If data is not a dictionary or list, it cannot be parsed
|
||||
if not isinstance(data, Sequence):
|
||||
raise TypeError("Could not parse the data")
|
||||
|
||||
if isinstance(data[0], Sequence):
|
||||
return sorted([(_parse_date(i, date_format), float(j)) for i, j in data])
|
||||
|
||||
# If first element is not a dictionary or tuple, it cannot be parsed
|
||||
if not isinstance(data[0], Mapping):
|
||||
raise TypeError("Could not parse the data")
|
||||
|
||||
if len(data[0]) == 1:
|
||||
current_data: List[tuple] = [tuple(*i.items()) for i in data]
|
||||
elif len(data[0]) == 2:
|
||||
current_data: List[tuple] = [tuple(i.values()) for i in data]
|
||||
else:
|
||||
raise TypeError("Could not parse the data")
|
||||
return _preprocess_timeseries(current_data, date_format)
|
||||
|
||||
|
||||
def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> Tuple[datetime.timedelta]:
|
||||
"""Checks the arguments and returns appropriate timedelta objects"""
|
||||
|
||||
deltas = {"exact": 0, "previous": -1, "next": 1}
|
||||
if closest not in deltas.keys():
|
||||
raise ValueError(f"Invalid argument for closest: {closest}")
|
||||
|
||||
as_on_match: str = closest if as_on_match == "closest" else as_on_match
|
||||
prior_match: str = closest if prior_match == "closest" else prior_match
|
||||
|
||||
if as_on_match in deltas.keys():
|
||||
as_on_delta: datetime.timedelta = datetime.timedelta(days=deltas[as_on_match])
|
||||
else:
|
||||
raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
|
||||
|
||||
if prior_match in deltas.keys():
|
||||
prior_delta: datetime.timedelta = datetime.timedelta(days=deltas[prior_match])
|
||||
else:
|
||||
raise ValueError(f"Invalid prior_match argument: {prior_match}")
|
||||
|
||||
return as_on_delta, prior_delta
|
||||
|
||||
|
||||
def _preprocess_from_to_date(
|
||||
from_date: datetime.date | str,
|
||||
to_date: datetime.date | str,
|
||||
time_series: Mapping = None,
|
||||
align_dates: bool = True,
|
||||
return_period_unit: Literal["years", "months", "days"] = None,
|
||||
return_period_value: int = None,
|
||||
as_on_match: str = "closest",
|
||||
prior_match: str = "closest",
|
||||
closest: Literal["previous", "next", "exact"] = "previous",
|
||||
) -> tuple:
|
||||
|
||||
as_on_match, prior_match = _preprocess_match_options(as_on_match, prior_match, closest)
|
||||
|
||||
if (from_date is None or to_date is None) and time_series is None:
|
||||
raise ValueError("Provide either to_date and from_date or time_series data")
|
||||
|
||||
if time_series is not None and (return_period_unit is None or return_period_value is None):
|
||||
raise ValueError("Provide return period for calculation of from_date")
|
||||
|
||||
if from_date is None:
|
||||
expected_start_date = time_series.start_date + relativedelta(**{return_period_unit: return_period_value})
|
||||
from_date = _find_closest_date(time_series, expected_start_date, 999, as_on_match, "fail")[0]
|
||||
|
||||
if to_date is None:
|
||||
to_date = time_series.end_date
|
||||
|
||||
return from_date, to_date
|
||||
|
||||
|
||||
def _find_closest_date(
|
||||
data: Mapping[datetime.datetime, float],
|
||||
date: datetime.datetime,
|
||||
limit_days: int,
|
||||
delta: datetime.timedelta,
|
||||
if_not_found: Literal["fail", "nan"],
|
||||
) -> Tuple[datetime.datetime, float]:
|
||||
"""Helper function to find data for the closest available date
|
||||
|
||||
data:
|
||||
TimeSeries data
|
||||
"""
|
||||
|
||||
if delta.days < 0 and date < min(data.data):
|
||||
if if_not_found == "nan":
|
||||
return float("NaN"), float("NaN")
|
||||
else:
|
||||
raise DateOutOfRangeError(date, "min")
|
||||
if delta.days > 0 and date > max(data.data):
|
||||
if if_not_found == "nan":
|
||||
return float("NaN"), float("NaN")
|
||||
else:
|
||||
raise DateOutOfRangeError(date, "max")
|
||||
|
||||
row: tuple = data.get(date, None)
|
||||
if row is not None:
|
||||
return row
|
||||
|
||||
if delta and limit_days != 0:
|
||||
return _find_closest_date(data, date + delta, limit_days - 1, delta, if_not_found)
|
||||
|
||||
if if_not_found == "fail":
|
||||
raise DateNotFoundError("Data not found for date", date)
|
||||
if if_not_found == "nan":
|
||||
return date, float("NaN")
|
||||
|
||||
raise ValueError(f"Invalid argument for if_not_found: {if_not_found}")
|
||||
|
||||
|
||||
def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> float:
|
||||
"""Converts any time period to years for use with compounding functions"""
|
||||
|
||||
year_conversion_factor: dict = {"years": 1, "months": 12, "days": 365}
|
||||
years: float = interval_value / year_conversion_factor[interval_type]
|
||||
return years
|
||||
|
||||
|
||||
def _is_eomonth(dates: Sequence[datetime.datetime], threshold: float = 0.7):
|
||||
"""Checks if a series is should be treated as end of month date series or not.
|
||||
|
||||
If eomonth dates exceed threshold percentage, it will be treated as eomonth series.
|
||||
This can be used for any frequency, but will work only for monthly and lower frequencies.
|
||||
"""
|
||||
eomonth_dates = [date.month != (date + relativedelta(days=1)).month for date in dates]
|
||||
eomonth_proportion = sum(eomonth_dates) / len(dates)
|
||||
return eomonth_proportion > threshold
|
||||
|
||||
|
||||
def covariance(series1: list, series2: list) -> float:
|
||||
"""Returns the covariance of two series
|
||||
|
||||
This is a compatibility function for Python versions prior to 3.10.
|
||||
It will be replaced with statistics.covariance when support is dropped for versions <3.10.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
series1 : List
|
||||
A list of numbers
|
||||
series2 : list
|
||||
A list of numbers
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
Returns the covariance as a float value
|
||||
"""
|
||||
|
||||
n = len(series1)
|
||||
if len(series2) != n:
|
||||
raise ValueError("Lenght of both series must be same for covariance calcualtion.")
|
||||
if n < 2:
|
||||
raise ValueError("At least two data poitns are required for covariance calculation.")
|
||||
|
||||
mean1 = statistics.mean(series1)
|
||||
mean2 = statistics.mean(series2)
|
||||
|
||||
xy = sum([(x - mean1) * (y - mean2) for x, y in zip(series1, series2)])
|
||||
|
||||
return xy / n
|
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
12
setup.py
12
setup.py
@ -2,21 +2,17 @@ from setuptools import find_packages, setup
|
||||
|
||||
license = open("LICENSE").read().strip()
|
||||
|
||||
|
||||
setup(
|
||||
name="Fincal",
|
||||
version='0.0.1',
|
||||
name="pyfacts",
|
||||
version="0.0.1",
|
||||
license=license,
|
||||
author="Gourav Kumar",
|
||||
author_email="gouravkr@outlook.in",
|
||||
url="https://gouravkumar.com",
|
||||
description="A library which makes handling time series data easier",
|
||||
description="A Python library to perform financial analytics on Time Series data",
|
||||
long_description=open("README.md").read().strip(),
|
||||
packages=find_packages(),
|
||||
install_requires=["python-dateutil"],
|
||||
test_suite="tests",
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"fincal=fincal.__main__:main",
|
||||
]
|
||||
},
|
||||
)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,139 +0,0 @@
|
||||
amfi_code,date,nav
|
||||
118825,01-11-2021,87.925
|
||||
119528,02-11-2021,378.51
|
||||
118825,02-11-2021,87.885
|
||||
119528,03-11-2021,377.79
|
||||
118825,03-11-2021,87.553
|
||||
119528,08-11-2021,383.13
|
||||
118825,08-11-2021,88.743
|
||||
119528,09-11-2021,383.06
|
||||
118825,09-11-2021,88.793
|
||||
119528,10-11-2021,382.71
|
||||
118825,10-11-2021,88.723
|
||||
118825,10-11-2021,88.78
|
||||
119528,11-11-2021,379.28
|
||||
118825,11-11-2021,88.205
|
||||
119528,12-11-2021,383.94
|
||||
118825,12-11-2021,89.025
|
||||
119528,15-11-2021,383.31
|
||||
118825,15-11-2021,89.182
|
||||
119528,16-11-2021,381.08
|
||||
118825,16-11-2021,88.569
|
||||
119528,17-11-2021,379.17
|
||||
118825,17-11-2021,88.09
|
||||
119528,18-11-2021,375.09
|
||||
118825,18-11-2021,87.202
|
||||
119528,22-11-2021,368.16
|
||||
118825,22-11-2021,85.382
|
||||
119528,23-11-2021,370.64
|
||||
118825,23-11-2021,85.978
|
||||
119528,24-11-2021,369.91
|
||||
118825,24-11-2021,85.635
|
||||
119528,25-11-2021,371.33
|
||||
118825,25-11-2021,86.212
|
||||
119528,26-11-2021,360.66
|
||||
118825,26-11-2021,83.748
|
||||
119528,29-11-2021,360.05
|
||||
118825,29-11-2021,83.523
|
||||
119528,30-11-2021,359.8
|
||||
118825,30-11-2021,83.475
|
||||
119528,01-12-2021,362.35
|
||||
118825,01-12-2021,84.269
|
||||
119528,02-12-2021,366.09
|
||||
118825,02-12-2021,85.105
|
||||
119528,03-12-2021,363.11
|
||||
118825,03-12-2021,84.507
|
||||
119528,06-12-2021,357.21
|
||||
118825,06-12-2021,83.113
|
||||
119528,07-12-2021,362.63
|
||||
118825,07-12-2021,84.429
|
||||
119528,08-12-2021,368.73
|
||||
118825,08-12-2021,85.935
|
||||
119528,09-12-2021,369.49
|
||||
118825,09-12-2021,86.045
|
||||
119528,10-12-2021,369.44
|
||||
118825,10-12-2021,86.058
|
||||
119528,13-12-2021,367.6
|
||||
118825,13-12-2021,85.632
|
||||
119528,14-12-2021,366.36
|
||||
118825,14-12-2021,85.502
|
||||
119528,15-12-2021,364.34
|
||||
118825,15-12-2021,84.989
|
||||
119528,16-12-2021,363.73
|
||||
118825,16-12-2021,84.972
|
||||
119528,17-12-2021,358.17
|
||||
118825,17-12-2021,83.83
|
||||
119528,20-12-2021,349.98
|
||||
118825,20-12-2021,81.817
|
||||
119528,21-12-2021,353.71
|
||||
118825,21-12-2021,82.746
|
||||
119528,22-12-2021,357.93
|
||||
118825,22-12-2021,83.776
|
||||
119528,23-12-2021,360.68
|
||||
118825,23-12-2021,84.297
|
||||
119528,24-12-2021,359.11
|
||||
118825,24-12-2021,83.903
|
||||
119528,27-12-2021,360.71
|
||||
118825,27-12-2021,84.227
|
||||
119528,28-12-2021,363.81
|
||||
118825,28-12-2021,85.044
|
||||
119528,29-12-2021,363.2
|
||||
118825,29-12-2021,85.03
|
||||
119528,30-12-2021,363.31
|
||||
118825,30-12-2021,85.047
|
||||
119528,31-12-2021,366.98
|
||||
118825,31-12-2021,85.759
|
||||
119528,03-01-2022,371.76
|
||||
118825,03-01-2022,87.111
|
||||
119528,04-01-2022,374.22
|
||||
118825,04-01-2022,87.804
|
||||
119528,05-01-2022,376.31
|
||||
118825,05-01-2022,88.162
|
||||
119528,06-01-2022,373.64
|
||||
118825,06-01-2022,87.541
|
||||
119528,07-01-2022,374.68
|
||||
118825,07-01-2022,87.818
|
||||
119528,10-01-2022,378.47
|
||||
118825,10-01-2022,88.622
|
||||
119528,11-01-2022,379.34
|
||||
118825,11-01-2022,88.678
|
||||
119528,12-01-2022,382.86
|
||||
118825,12-01-2022,89.332
|
||||
119528,13-01-2022,383.68
|
||||
118825,13-01-2022,89.553
|
||||
119528,14-01-2022,384.02
|
||||
118825,14-01-2022,89.729
|
||||
119528,17-01-2022,384.36
|
||||
118825,17-01-2022,89.733
|
||||
119528,18-01-2022,380
|
||||
118825,18-01-2022,88.781
|
||||
119528,19-01-2022,377.24
|
||||
118825,19-01-2022,88.059
|
||||
119528,20-01-2022,374.45
|
||||
118825,20-01-2022,87.361
|
||||
119528,21-01-2022,369.86
|
||||
118825,21-01-2022,86.22
|
||||
119528,24-01-2022,361.01
|
||||
118825,24-01-2022,83.907
|
||||
119528,25-01-2022,364.63
|
||||
118825,25-01-2022,84.763
|
||||
119528,27-01-2022,361.95
|
||||
118825,27-01-2022,83.876
|
||||
119528,28-01-2022,361.91
|
||||
118825,28-01-2022,83.829
|
||||
119528,31-01-2022,367.31
|
||||
118825,31-01-2022,85.18
|
||||
119528,04-02-2022,371.01
|
||||
118825,04-02-2022,86.079
|
||||
119528,07-02-2022,365.04
|
||||
118825,07-02-2022,84.867
|
||||
119528,08-02-2022,365.74
|
||||
118825,08-02-2022,84.945
|
||||
119528,09-02-2022,369.85
|
||||
118825,09-02-2022,85.977
|
||||
119528,10-02-2022,372.29
|
||||
118825,10-02-2022,86.5
|
||||
119528,11-02-2022,366.91
|
||||
118825,11-02-2022,85.226
|
||||
119528,14-02-2022,355.47
|
||||
118825,14-02-2022,82.533
|
|
@ -1,219 +0,0 @@
|
||||
"amfi_code","date","nav"
|
||||
118825,2013-01-31,18.913
|
||||
118825,2013-02-28,17.723
|
||||
118825,2013-03-28,17.563
|
||||
118825,2013-04-30,18.272
|
||||
118825,2013-05-31,18.383
|
||||
118825,2013-06-28,17.802
|
||||
118825,2013-07-31,17.588
|
||||
118825,2013-08-30,16.993
|
||||
118825,2013-09-30,17.732
|
||||
118825,2013-10-31,19.665
|
||||
118825,2013-11-29,19.787
|
||||
118825,2013-12-31,20.499
|
||||
118825,2014-01-31,19.994
|
||||
118825,2014-02-28,20.942
|
||||
118825,2014-03-31,22.339
|
||||
118825,2014-04-30,22.599
|
||||
118825,2014-05-30,24.937
|
||||
118825,2014-06-30,27.011
|
||||
118825,2014-07-31,27.219
|
||||
118825,2014-08-28,28.625
|
||||
118825,2014-09-30,29.493
|
||||
118825,2014-10-31,30.685
|
||||
118825,2014-11-28,31.956
|
||||
118825,2014-12-31,31.646
|
||||
118825,2015-01-30,33.653
|
||||
118825,2015-02-27,33.581
|
||||
118825,2015-03-31,33.14
|
||||
118825,2015-04-30,32.181
|
||||
118825,2015-05-29,33.256
|
||||
118825,2015-06-30,33.227
|
||||
118825,2015-07-31,34.697
|
||||
118825,2015-08-31,32.833
|
||||
118825,2015-09-30,32.94
|
||||
118825,2015-10-30,33.071
|
||||
118825,2015-11-30,33.024
|
||||
118825,2015-12-31,33.267
|
||||
118825,2016-01-29,31.389
|
||||
118825,2016-02-29,28.751
|
||||
118825,2016-03-31,32.034
|
||||
118825,2016-04-29,32.848
|
||||
118825,2016-05-31,34.135
|
||||
118825,2016-06-30,35.006
|
||||
118825,2016-07-29,37.148
|
||||
118825,2016-08-31,38.005
|
||||
118825,2016-09-30,37.724
|
||||
118825,2016-10-28,38.722
|
||||
118825,2016-11-30,36.689
|
||||
118825,2016-12-30,36.239
|
||||
118825,2017-01-31,38.195
|
||||
118825,2017-02-28,39.873
|
||||
118825,2017-03-31,41.421
|
||||
118825,2017-04-28,42.525
|
||||
118825,2017-05-31,43.977
|
||||
118825,2017-06-30,43.979
|
||||
118825,2017-07-31,46.554
|
||||
118825,2017-08-31,46.383
|
||||
118825,2017-09-29,46.085
|
||||
118825,2017-10-31,48.668
|
||||
118825,2017-11-30,48.824
|
||||
118825,2017-12-29,50.579
|
||||
118825,2018-01-31,51.799
|
||||
118825,2018-02-28,49.041
|
||||
118825,2018-03-28,46.858
|
||||
118825,2018-04-30,49.636
|
||||
118825,2018-05-31,49.169
|
||||
118825,2018-06-29,48.716
|
||||
118825,2018-07-31,51.455
|
||||
118825,2018-08-31,53.494
|
||||
118825,2018-09-28,49.863
|
||||
118825,2018-10-31,48.538
|
||||
118825,2018-11-30,50.597
|
||||
118825,2018-12-31,50.691
|
||||
118825,2019-01-31,50.517
|
||||
118825,2019-02-28,50.176
|
||||
118825,2019-03-31,54.017
|
||||
118825,2019-04-30,54.402
|
||||
118825,2019-05-31,55.334
|
||||
118825,2019-06-28,55.181
|
||||
118825,2019-07-31,52.388
|
||||
118825,2019-08-30,52.214
|
||||
118825,2019-09-30,54.058
|
||||
118825,2019-10-31,56.514
|
||||
118825,2019-11-29,57.42
|
||||
118825,2019-12-31,57.771
|
||||
118825,2020-01-31,57.135
|
||||
118825,2020-02-28,54.034
|
||||
118825,2020-03-31,41.452
|
||||
118825,2020-04-30,47.326
|
||||
118825,2020-05-29,45.845
|
||||
118825,2020-06-30,49.526
|
||||
118825,2020-07-31,53.306000000000004
|
||||
118825,2020-08-19,55.747
|
||||
118825,2020-10-30,56.387
|
||||
118825,2020-11-27,62.001000000000005
|
||||
118825,2020-12-31,66.415
|
||||
118825,2021-01-29,65.655
|
||||
118825,2021-02-26,70.317
|
||||
118825,2021-03-31,70.69
|
||||
118825,2021-04-30,70.39
|
||||
118825,2021-05-31,74.85
|
||||
118825,2021-06-30,77.109
|
||||
118825,2021-07-30,78.335
|
||||
118825,2021-08-31,83.691
|
||||
118825,2021-09-30,86.128
|
||||
118825,2021-10-29,86.612
|
||||
118825,2021-11-30,83.475
|
||||
118825,2021-12-31,85.759
|
||||
118825,2022-01-31,85.18
|
||||
118825,2022-02-17,84.33
|
||||
119528,2013-01-31,101.36
|
||||
119528,2013-02-28,95.25
|
||||
119528,2013-03-28,94.81
|
||||
119528,2013-04-30,99.75
|
||||
119528,2013-05-31,99.73
|
||||
119528,2013-06-28,97.52
|
||||
119528,2013-07-31,95.37
|
||||
119528,2013-08-30,92.24
|
||||
119528,2013-09-30,97.45
|
||||
119528,2013-10-31,107.03
|
||||
119528,2013-11-29,105.91
|
||||
119528,2013-12-31,109.3
|
||||
119528,2014-01-31,105.09
|
||||
119528,2014-02-28,108.58
|
||||
119528,2014-03-31,117.28
|
||||
119528,2014-04-30,118.06
|
||||
119528,2014-05-30,131.33
|
||||
119528,2014-06-30,139.48
|
||||
119528,2014-07-31,140.49
|
||||
119528,2014-08-28,145.43
|
||||
119528,2014-09-30,147.4
|
||||
119528,2014-10-31,154.46
|
||||
119528,2014-11-28,161.93
|
||||
119528,2014-12-31,159.62
|
||||
119528,2015-01-30,170.46
|
||||
119528,2015-02-27,171.18
|
||||
119528,2015-03-31,166.8
|
||||
119528,2015-04-30,161.95
|
||||
119528,2015-05-29,166.78
|
||||
119528,2015-06-30,166.67
|
||||
119528,2015-07-31,172.33
|
||||
119528,2015-08-31,161.96
|
||||
119528,2015-09-30,162.25
|
||||
119528,2015-10-30,164.16
|
||||
119528,2015-11-30,162.7
|
||||
119528,2015-12-31,162.83
|
||||
119528,2016-01-29,155.87
|
||||
119528,2016-02-29,144.56
|
||||
119528,2016-03-31,159.88
|
||||
119528,2016-04-29,163.54
|
||||
119528,2016-05-31,170.01
|
||||
119528,2016-06-30,174.61
|
||||
119528,2016-07-29,184.36
|
||||
119528,2016-08-31,189.33
|
||||
119528,2016-09-30,187.16
|
||||
119528,2016-10-28,189.29
|
||||
119528,2016-11-30,178.19
|
||||
119528,2016-12-30,176.66
|
||||
119528,2017-01-31,185.76
|
||||
119528,2017-02-28,193.2
|
||||
119528,2017-03-31,200.54
|
||||
119528,2017-04-28,205.25
|
||||
119528,2017-05-31,208.22
|
||||
119528,2017-06-30,209.83
|
||||
119528,2017-07-31,221.15
|
||||
119528,2017-08-31,219.99
|
||||
119528,2017-09-29,217.7
|
||||
119528,2017-10-31,226.94
|
||||
119528,2017-11-30,225.24
|
||||
119528,2017-12-29,233.26
|
||||
119528,2018-01-31,237.57
|
||||
119528,2018-02-28,226.55
|
||||
119528,2018-03-28,219.73
|
||||
119528,2018-04-30,232.04
|
||||
119528,2018-05-31,228.49
|
||||
119528,2018-06-29,225.27
|
||||
119528,2018-07-31,237.11
|
||||
119528,2018-08-31,243.79
|
||||
119528,2018-09-28,223.83
|
||||
119528,2018-10-31,218.61
|
||||
119528,2018-11-30,226.99
|
||||
119528,2018-12-31,228.61
|
||||
119528,2019-01-31,224.26
|
||||
119528,2019-02-28,222.71
|
||||
119528,2019-03-29,240.21
|
||||
119528,2019-04-30,240.01
|
||||
119528,2019-05-31,243.72
|
||||
119528,2019-06-28,241.28
|
||||
119528,2019-07-31,229.54
|
||||
119528,2019-08-30,226.0
|
||||
119528,2019-09-30,234.75
|
||||
119528,2019-10-31,242.11
|
||||
119528,2019-11-29,246.75
|
||||
119528,2019-12-31,247.81
|
||||
119528,2020-01-31,246.14
|
||||
119528,2020-02-28,231.91
|
||||
119528,2020-03-31,175.98
|
||||
119528,2020-04-30,200.77
|
||||
119528,2020-05-29,196.75
|
||||
119528,2020-06-30,210.55
|
||||
119528,2020-07-31,224.93
|
||||
119528,2020-08-19,233.78
|
||||
119528,2020-10-30,235.83
|
||||
119528,2020-11-27,264.04
|
||||
119528,2020-12-31,285.02
|
||||
119528,2021-01-29,280.52
|
||||
119528,2021-02-26,300.56
|
||||
119528,2021-03-31,301.57
|
||||
119528,2021-04-30,301.1
|
||||
119528,2021-05-31,320.98
|
||||
119528,2021-06-30,327.64
|
||||
119528,2021-07-30,336.6
|
||||
119528,2021-08-31,360.75
|
||||
119528,2021-09-30,369.42
|
||||
119528,2021-10-29,372.89
|
||||
119528,2021-11-30,359.8
|
||||
119528,2021-12-31,366.98
|
||||
119528,2022-01-31,367.31
|
||||
119528,2022-02-17,363.56
|
|
@ -1,11 +0,0 @@
|
||||
amfi_code,date,nav
|
||||
118825,31-03-2021,70.69
|
||||
118825,30-04-2021,70.39
|
||||
118825,31-05-2021,74.85
|
||||
118825,30-07-2021,78.335
|
||||
118825,31-08-2021,83.691
|
||||
118825,30-09-2021,86.128
|
||||
118825,29-10-2021,86.612
|
||||
118825,30-11-2021,83.475
|
||||
118825,31-01-2022,85.18
|
||||
118825,17-02-2022,84.33
|
|
@ -1,71 +0,0 @@
|
||||
"amfi_code","date","nav"
|
||||
118825,2013-03-28,17.563
|
||||
118825,2013-06-28,17.802
|
||||
118825,2013-09-30,17.732
|
||||
118825,2013-12-31,20.499
|
||||
118825,2014-03-31,22.339
|
||||
118825,2014-06-30,27.011
|
||||
118825,2014-09-30,29.493
|
||||
118825,2014-12-31,31.646
|
||||
118825,2015-03-31,33.14
|
||||
118825,2015-06-30,33.227
|
||||
118825,2015-09-30,32.94
|
||||
118825,2015-12-31,33.267
|
||||
118825,2016-03-31,32.034
|
||||
118825,2016-06-30,35.006
|
||||
118825,2016-09-30,37.724
|
||||
118825,2016-12-30,36.239
|
||||
118825,2017-03-31,41.421
|
||||
118825,2017-06-30,43.979
|
||||
118825,2017-09-29,46.085
|
||||
118825,2017-12-29,50.579
|
||||
118825,2018-03-28,46.858
|
||||
118825,2018-06-29,48.716
|
||||
118825,2018-09-28,49.863
|
||||
118825,2018-12-31,50.691
|
||||
118825,2019-03-31,54.017
|
||||
118825,2019-06-28,55.181
|
||||
118825,2019-09-30,54.058
|
||||
118825,2019-12-31,57.771
|
||||
118825,2020-03-31,41.452
|
||||
118825,2020-06-30,49.526
|
||||
118825,2020-12-31,66.415
|
||||
118825,2021-03-31,70.69
|
||||
118825,2021-06-30,77.109
|
||||
118825,2021-09-30,86.128
|
||||
118825,2021-12-31,85.759
|
||||
119528,2013-03-28,94.81
|
||||
119528,2013-06-28,97.52
|
||||
119528,2013-09-30,97.45
|
||||
119528,2013-12-31,109.3
|
||||
119528,2014-03-31,117.28
|
||||
119528,2014-06-30,139.48
|
||||
119528,2014-09-30,147.4
|
||||
119528,2014-12-31,159.62
|
||||
119528,2015-03-31,166.8
|
||||
119528,2015-06-30,166.67
|
||||
119528,2015-09-30,162.25
|
||||
119528,2015-12-31,162.83
|
||||
119528,2016-03-31,159.88
|
||||
119528,2016-06-30,174.61
|
||||
119528,2016-09-30,187.16
|
||||
119528,2016-12-30,176.66
|
||||
119528,2017-03-31,200.54
|
||||
119528,2017-06-30,209.83
|
||||
119528,2017-09-29,217.7
|
||||
119528,2017-12-29,233.26
|
||||
119528,2018-03-28,219.73
|
||||
119528,2018-06-29,225.27
|
||||
119528,2018-09-28,223.83
|
||||
119528,2018-12-31,228.61
|
||||
119528,2019-03-29,240.21
|
||||
119528,2019-06-28,241.28
|
||||
119528,2019-09-30,234.75
|
||||
119528,2019-12-31,247.81
|
||||
119528,2020-03-31,175.98
|
||||
119528,2020-06-30,210.55
|
||||
119528,2020-12-31,285.02
|
||||
119528,2021-03-31,301.57
|
||||
119528,2021-06-30,327.64
|
||||
119528,2021-09-30,369.42
|
||||
119528,2021-12-31,366.98
|
|
@ -1,9 +0,0 @@
|
||||
amfi_code,date,nav
|
||||
118825,31-03-2019,54.017
|
||||
118825,28-06-2019,55.181
|
||||
118825,31-12-2019,57.771
|
||||
118825,31-03-2020,41.452
|
||||
118825,30-06-2020,49.526
|
||||
118825,30-06-2021,77.109
|
||||
118825,30-09-2021,86.128
|
||||
118825,31-12-2021,85.759
|
|
192
testing.ipynb
192
testing.ipynb
@ -1,192 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import datetime\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"from fincal.fincal import TimeSeries\n",
|
||||
"from fincal.core import Series"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')\n",
|
||||
"\n",
|
||||
"dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "c52b0c2c-dd01-48dd-9ffa-3147ec9571ef",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Warning: The input data contains duplicate dates which have been ignored.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
|
||||
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
|
||||
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n",
|
||||
"\t ...\n",
|
||||
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
|
||||
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
|
||||
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
|
||||
"\n",
|
||||
"ts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9e8ff6c6-3a36-435a-ba87-5b9844c18779",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(datetime.datetime(2022, 1, 31, 0, 0), 85.18),\n",
|
||||
" (datetime.datetime(2021, 5, 31, 0, 0), 74.85)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ts[['2022-01-31', '2021-05-31']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "4d927a61-0f90-4b47-89b7-0e0d3ab1b442",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"s = ts.dates > '2020-01-01'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "f90074f8-5173-49a9-a7d6-ceac01e92431",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"TimeSeries([(datetime.datetime(2020, 1, 2, 0, 0), 58.285),\n",
|
||||
"\t (datetime.datetime(2020, 1, 3, 0, 0), 58.056999999999995),\n",
|
||||
"\t (datetime.datetime(2020, 1, 6, 0, 0), 56.938)\n",
|
||||
"\t ...\n",
|
||||
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
|
||||
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
|
||||
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ts[s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: total: 15.6 ms\n",
|
||||
"Wall time: 13 ms\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"from_date = datetime.date(2020, 1, 1)\n",
|
||||
"to_date = datetime.date(2021, 1, 1)\n",
|
||||
"# print(ts.calculate_returns(to_date, years=7))\n",
|
||||
"rr = ts.calculate_rolling_returns(from_date, to_date)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "086d4377-d1b1-4e51-84c0-39dee28ef75e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"list"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"type(rr)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
42
tests/README.md
Normal file
42
tests/README.md
Normal file
@ -0,0 +1,42 @@
|
||||
# Testing Guidelines
|
||||
PyFacts uses Pytest for unit testing.
|
||||
|
||||
All high level functions are expected to have tests written for them. Each file in the pyfacts module has a dedicated test file. All tests related to that file go within the respective test files.
|
||||
|
||||
Since this module needs test data for testing, a Pytest fixture has been defined to generate test data. Use this fixture to generate test data. The fixture uses the random module to generate random test data. A seed has been hardcoded for the random data generator to ensure it generates the same data all the time (if it didn't, tests for specific values would never pass).
|
||||
WARNING! Do not change the seed for the random data generator. This will cause most tests to fail.
|
||||
|
||||
To use the fixture, just pass `create_test_data` as an argument to the test function and then use it within the function. Pytest will automatically locate the relevant function (it need not be imported into the test file).
|
||||
|
||||
## Writing tests
|
||||
Tests are organised as follows:
|
||||
- Each broad function/method has a Test Class
|
||||
- All variations should be tested within this class using one or more functions
|
||||
|
||||
All test files should be named `test_<module_file_name>.py`.
|
||||
For instance, test file for `core.py` is named `test_core.py`
|
||||
|
||||
All class names should begin with the word `Test`.
|
||||
All function names should begin with the word `test_`.
|
||||
|
||||
It needs to be ensured that all test functions are independent of each other.
|
||||
## Running tests
|
||||
Skip this part if you already know how to run pytest.
|
||||
|
||||
Open the terminal. Make sure you are in the root pyfacts folder. Then run the following command:
|
||||
`pytest tests`
|
||||
|
||||
This will run the entire test suite. This can take some time depending on the number of tests and speed of your computer. Hence you might want to run only a few tests.
|
||||
|
||||
To run tests within a particular file, say test_core.py, type the following command:
|
||||
`pytest tests/test_core.py`
|
||||
|
||||
If you want to run only a particular class within a file, for instance `TestSetitem` within the `test_core.py` file, run them as follows:
|
||||
`pytest tests/test_core.py::TestSetitem`
|
||||
|
||||
This will run only the specified class, making sure your tests don't take too long.
|
||||
|
||||
If you're using VS Code, you can make this whole process easier by configuring pytest within VS Code. It will identify all tests and allow you to run them individually from the testing pane on the left.
|
||||
|
||||
### Before you push your code
|
||||
Before you push your code or raise a PR, ensure that all tests are passing. PRs where any of the tests are failing will not be merged. Any modifications to the code which require a modification to existing tests should be accompanied with a note in the PR as to the reasons existing tests had to be modified.
|
111
tests/conftest.py
Normal file
111
tests/conftest.py
Normal file
@ -0,0 +1,111 @@
|
||||
import datetime
|
||||
import math
|
||||
import random
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import pyfacts as pft
|
||||
|
||||
|
||||
def conf_add(n1, n2):
|
||||
return n1 + n2
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def conf_fun():
|
||||
return conf_add
|
||||
|
||||
|
||||
def create_prices(s0: float, mu: float, sigma: float, num_prices: int) -> list:
|
||||
"""Generates a price following a geometric brownian motion process based on the input of the arguments.
|
||||
|
||||
Since this function is used only to generate data for tests, the seed is fixed as 1234.
|
||||
Many of the tests rely on exact values generated using this seed.
|
||||
If the seed is changed, those tests will fail.
|
||||
|
||||
Parameters:
|
||||
------------
|
||||
s0: float
|
||||
Asset inital price.
|
||||
|
||||
mu: float
|
||||
Interest rate expressed annual terms.
|
||||
|
||||
sigma: float
|
||||
Volatility expressed annual terms.
|
||||
|
||||
num_prices: int
|
||||
number of prices to generate
|
||||
|
||||
Returns:
|
||||
--------
|
||||
Returns a list of values generated using GBM algorithm
|
||||
"""
|
||||
|
||||
random.seed(1234) # WARNING! Changing the seed will cause most tests to fail
|
||||
all_values = []
|
||||
for _ in range(num_prices):
|
||||
s0 *= math.exp(
|
||||
(mu - 0.5 * sigma**2) * (1.0 / 365.0) + sigma * math.sqrt(1.0 / 365.0) * random.gauss(mu=0, sigma=1)
|
||||
)
|
||||
all_values.append(round(s0, 2))
|
||||
|
||||
return all_values
|
||||
|
||||
|
||||
def sample_data_generator(
|
||||
frequency: pft.Frequency,
|
||||
start_date: datetime.date = datetime.date(2017, 1, 1),
|
||||
num: int = 1000,
|
||||
skip_weekends: bool = False,
|
||||
mu: float = 0.1,
|
||||
sigma: float = 0.05,
|
||||
eomonth: bool = False,
|
||||
dates_as_string: bool = False,
|
||||
) -> List[tuple]:
|
||||
"""Creates TimeSeries data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
frequency: Frequency
|
||||
The frequency of the time series data to be generated.
|
||||
|
||||
num: int
|
||||
Number of date: value pairs to be generated.
|
||||
|
||||
skip_weekends: bool
|
||||
Whether weekends (saturday, sunday) should be skipped.
|
||||
Gets used only if the frequency is daily.
|
||||
|
||||
mu: float
|
||||
Mean return for the values.
|
||||
|
||||
sigma: float
|
||||
standard deviation of the values.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
Returns a TimeSeries object
|
||||
"""
|
||||
|
||||
timedelta_dict = {
|
||||
frequency.freq_type: int(
|
||||
frequency.value * num * (7 / 5 if frequency == pft.AllFrequencies.D and skip_weekends else 1)
|
||||
)
|
||||
}
|
||||
end_date = start_date + relativedelta(**timedelta_dict)
|
||||
dates = pft.create_date_series(
|
||||
start_date, end_date, frequency.symbol, skip_weekends=skip_weekends, eomonth=eomonth, ensure_coverage=False
|
||||
)
|
||||
if dates_as_string:
|
||||
dates = [dt.strftime("%Y-%m-%d") for dt in dates]
|
||||
values = create_prices(1000, mu, sigma, num)
|
||||
ts = list(zip(dates, values))
|
||||
return ts
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_test_data():
|
||||
return sample_data_generator
|
7560
tests/data/msft.csv
Normal file
7560
tests/data/msft.csv
Normal file
File diff suppressed because it is too large
Load Diff
435
tests/test_core.py
Normal file
435
tests/test_core.py
Normal file
@ -0,0 +1,435 @@
|
||||
import datetime
|
||||
import random
|
||||
from typing import Mapping
|
||||
|
||||
import pyfacts as pft
|
||||
import pytest
|
||||
from pyfacts.utils import PyfactsOptions
|
||||
|
||||
|
||||
class TestFrequency:
|
||||
def test_creation(self):
|
||||
D = pft.Frequency("daily", "days", 1, 1, "D")
|
||||
assert D.days == 1
|
||||
assert D.symbol == "D"
|
||||
assert D.name == "daily"
|
||||
assert D.value == 1
|
||||
assert D.freq_type == "days"
|
||||
|
||||
|
||||
class TestAllFrequencies:
|
||||
def test_attributes(self):
|
||||
assert hasattr(pft.AllFrequencies, "D")
|
||||
assert hasattr(pft.AllFrequencies, "M")
|
||||
assert hasattr(pft.AllFrequencies, "Q")
|
||||
|
||||
def test_days(self):
|
||||
assert pft.AllFrequencies.D.days == 1
|
||||
assert pft.AllFrequencies.M.days == 30
|
||||
assert pft.AllFrequencies.Q.days == 91
|
||||
|
||||
def test_symbol(self):
|
||||
assert pft.AllFrequencies.H.symbol == "H"
|
||||
assert pft.AllFrequencies.W.symbol == "W"
|
||||
|
||||
def test_values(self):
|
||||
assert pft.AllFrequencies.H.value == 6
|
||||
assert pft.AllFrequencies.Y.value == 1
|
||||
|
||||
def test_type(self):
|
||||
assert pft.AllFrequencies.Q.freq_type == "months"
|
||||
assert pft.AllFrequencies.W.freq_type == "days"
|
||||
|
||||
|
||||
class TestSeries:
|
||||
def test_creation(self):
|
||||
series = pft.Series([1, 2, 3, 4, 5, 6, 7], dtype="number")
|
||||
assert series.dtype == float
|
||||
assert series[2] == 3
|
||||
|
||||
dates = pft.create_date_series("2021-01-01", "2021-01-31", frequency="D")
|
||||
series = pft.Series(dates, dtype="date")
|
||||
assert series.dtype == datetime.datetime
|
||||
|
||||
|
||||
class TestTimeSeriesCore:
|
||||
data = [("2021-01-01", 220), ("2021-02-01", 230), ("2021-03-01", 240)]
|
||||
|
||||
def test_repr_str(self, create_test_data):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
assert str(ts) in repr(ts).replace("\t", " ")
|
||||
|
||||
data = create_test_data(frequency=pft.AllFrequencies.D, eomonth=False, num=50, dates_as_string=True)
|
||||
ts = pft.TimeSeriesCore(data, frequency="D")
|
||||
assert "..." in str(ts)
|
||||
assert "..." in repr(ts)
|
||||
|
||||
def test_creation(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
assert isinstance(ts, pft.TimeSeriesCore)
|
||||
assert isinstance(ts, Mapping)
|
||||
|
||||
def test_creation_no_freq(self, create_test_data):
|
||||
data = create_test_data(num=300, frequency=pft.AllFrequencies.D)
|
||||
ts = pft.TimeSeriesCore(data)
|
||||
assert ts.frequency == pft.AllFrequencies.D
|
||||
|
||||
data = create_test_data(num=300, frequency=pft.AllFrequencies.M)
|
||||
ts = pft.TimeSeriesCore(data)
|
||||
assert ts.frequency == pft.AllFrequencies.M
|
||||
|
||||
def test_creation_no_freq_missing_data(self, create_test_data):
|
||||
data = create_test_data(num=300, frequency=pft.AllFrequencies.D)
|
||||
data = random.sample(data, 182)
|
||||
ts = pft.TimeSeriesCore(data)
|
||||
assert ts.frequency == pft.AllFrequencies.D
|
||||
|
||||
data = create_test_data(num=300, frequency=pft.AllFrequencies.D)
|
||||
data = random.sample(data, 175)
|
||||
with pytest.raises(ValueError):
|
||||
ts = pft.TimeSeriesCore(data)
|
||||
|
||||
data = create_test_data(num=100, frequency=pft.AllFrequencies.W)
|
||||
data = random.sample(data, 70)
|
||||
ts = pft.TimeSeriesCore(data)
|
||||
assert ts.frequency == pft.AllFrequencies.W
|
||||
|
||||
data = create_test_data(num=100, frequency=pft.AllFrequencies.W)
|
||||
data = random.sample(data, 68)
|
||||
with pytest.raises(ValueError):
|
||||
pft.TimeSeriesCore(data)
|
||||
|
||||
def test_creation_wrong_freq(self, create_test_data):
|
||||
data = create_test_data(num=100, frequency=pft.AllFrequencies.W)
|
||||
with pytest.raises(ValueError):
|
||||
pft.TimeSeriesCore(data, frequency="D")
|
||||
|
||||
data = create_test_data(num=100, frequency=pft.AllFrequencies.D)
|
||||
with pytest.raises(ValueError):
|
||||
pft.TimeSeriesCore(data, frequency="W")
|
||||
|
||||
|
||||
class TestSlicing:
|
||||
data = [("2021-01-01", 220), ("2021-02-01", 230), ("2021-03-01", 240)]
|
||||
|
||||
def test_getitem(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
assert ts.dates[0] == datetime.datetime(2021, 1, 1, 0, 0)
|
||||
assert ts.values[0] == 220
|
||||
assert ts["2021-01-01"][1] == 220
|
||||
assert len(ts[ts.dates > "2021-01-01"]) == 2
|
||||
assert ts[ts.dates == "2021-02-01"].iloc[0][1] == 230
|
||||
assert ts.iloc[2][0] == datetime.datetime(2021, 3, 1)
|
||||
assert len(ts.iloc[:2]) == 2
|
||||
with pytest.raises(KeyError):
|
||||
ts["2021-02-03"]
|
||||
subset_ts = ts[["2021-01-01", "2021-03-01"]]
|
||||
assert len(subset_ts) == 2
|
||||
assert isinstance(subset_ts, pft.TimeSeriesCore)
|
||||
assert subset_ts.iloc[1][1] == 240
|
||||
|
||||
def test_get(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
assert ts.dates[0] == datetime.datetime(2021, 1, 1, 0, 0)
|
||||
assert ts.values[0] == 220
|
||||
assert ts.get("2021-01-01")[1] == 220
|
||||
assert ts.get("2021-02-15") is None
|
||||
assert ts.get("2021-02-23", -1) == -1
|
||||
assert ts.get("2021-02-10", closest="previous")[1] == 230
|
||||
assert ts.get("2021-02-10", closest="next")[1] == 240
|
||||
PyfactsOptions.get_closest = "previous"
|
||||
assert ts.get("2021-02-10")[1] == 230
|
||||
PyfactsOptions.get_closest = "next"
|
||||
assert ts.get("2021-02-10")[1] == 240
|
||||
|
||||
def test_contains(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
assert datetime.datetime(2021, 1, 1) in ts
|
||||
assert "2021-01-01" in ts
|
||||
assert "2021-01-14" not in ts
|
||||
|
||||
def test_items(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
for i, j in ts.items():
|
||||
assert j == self.data[0][1]
|
||||
break
|
||||
|
||||
def test_special_keys(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
dates = ts["dates"]
|
||||
values = ts["values"]
|
||||
assert isinstance(dates, pft.Series)
|
||||
assert isinstance(values, pft.Series)
|
||||
assert len(dates) == 3
|
||||
assert len(values) == 3
|
||||
assert dates[0] == datetime.datetime(2021, 1, 1, 0, 0)
|
||||
assert values[0] == 220
|
||||
|
||||
def test_iloc_slicing(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
assert ts.iloc[0] == (datetime.datetime(2021, 1, 1), 220)
|
||||
assert ts.iloc[-1] == (datetime.datetime(2021, 3, 1), 240)
|
||||
|
||||
ts_slice = ts.iloc[0:2]
|
||||
assert isinstance(ts_slice, pft.TimeSeriesCore)
|
||||
assert len(ts_slice) == 2
|
||||
|
||||
|
||||
class TestComparativeSlicing:
|
||||
def test_date_gt_daily(self, create_test_data):
|
||||
data = create_test_data(num=300, frequency=pft.AllFrequencies.D)
|
||||
ts = pft.TimeSeries(data, "D")
|
||||
ts_rr = ts.calculate_rolling_returns(return_period_unit="months")
|
||||
assert len(ts_rr) == 269
|
||||
subset = ts_rr[ts_rr.values < 0.1]
|
||||
assert isinstance(subset, pft.TimeSeriesCore)
|
||||
assert subset.frequency == pft.AllFrequencies.D
|
||||
|
||||
def test_date_gt_monthly(self, create_test_data):
|
||||
data = create_test_data(num=60, frequency=pft.AllFrequencies.M)
|
||||
ts = pft.TimeSeries(data, "M")
|
||||
ts_rr = ts.calculate_rolling_returns(return_period_unit="months")
|
||||
assert len(ts_rr) == 59
|
||||
subset = ts_rr[ts_rr.values < 0.1]
|
||||
assert isinstance(subset, pft.TimeSeriesCore)
|
||||
assert subset.frequency == pft.AllFrequencies.M
|
||||
|
||||
|
||||
class TestSetitem:
|
||||
data = [("2021-01-01", 220), ("2021-01-04", 230), ("2021-03-07", 240)]
|
||||
|
||||
def test_setitem(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
assert len(ts) == 3
|
||||
|
||||
ts["2021-01-02"] = 225
|
||||
assert len(ts) == 4
|
||||
assert ts["2021-01-02"][1] == 225
|
||||
|
||||
ts["2021-01-02"] = 227.6
|
||||
assert len(ts) == 4
|
||||
assert ts["2021-01-02"][1] == 227.6
|
||||
|
||||
def test_errors(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
with pytest.raises(TypeError):
|
||||
ts["2021-01-03"] = "abc"
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ts.iloc[4] = 4
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ts["abc"] = 12
|
||||
|
||||
|
||||
class TestTimeSeriesCoreHeadTail:
|
||||
data = [
|
||||
("2021-01-01", 220),
|
||||
("2021-02-01", 230),
|
||||
("2021-03-01", 240),
|
||||
("2021-04-01", 250),
|
||||
("2021-05-01", 260),
|
||||
("2021-06-01", 270),
|
||||
("2021-07-01", 280),
|
||||
("2021-08-01", 290),
|
||||
("2021-09-01", 300),
|
||||
("2021-10-01", 310),
|
||||
("2021-11-01", 320),
|
||||
("2021-12-01", 330),
|
||||
]
|
||||
|
||||
def test_head(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
assert len(ts.head()) == 6
|
||||
assert len(ts.head(3)) == 3
|
||||
assert isinstance(ts.head(), pft.TimeSeriesCore)
|
||||
head_ts = ts.head(6)
|
||||
assert head_ts.iloc[-1][1] == 270
|
||||
|
||||
def test_tail(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
assert len(ts.tail()) == 6
|
||||
assert len(ts.tail(8)) == 8
|
||||
assert isinstance(ts.tail(), pft.TimeSeriesCore)
|
||||
tail_ts = ts.tail(6)
|
||||
assert tail_ts.iloc[0][1] == 280
|
||||
|
||||
def test_head_tail(self):
|
||||
ts = pft.TimeSeriesCore(self.data, frequency="M")
|
||||
head_tail_ts = ts.head(8).tail(2)
|
||||
assert isinstance(head_tail_ts, pft.TimeSeriesCore)
|
||||
assert "2021-07-01" in head_tail_ts
|
||||
assert head_tail_ts.iloc[1][1] == 290
|
||||
|
||||
|
||||
class TestDelitem:
|
||||
data = [
|
||||
("2021-01-01", 220),
|
||||
("2021-02-01", 230),
|
||||
("2021-03-01", 240),
|
||||
("2021-04-01", 250),
|
||||
]
|
||||
|
||||
def test_deletion(self):
|
||||
ts = pft.TimeSeriesCore(self.data, "M")
|
||||
assert len(ts) == 4
|
||||
del ts["2021-03-01"]
|
||||
assert len(ts) == 3
|
||||
assert "2021-03-01" not in ts
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
del ts["2021-03-01"]
|
||||
|
||||
|
||||
class TestTimeSeriesComparisons:
|
||||
data1 = [
|
||||
("2021-01-01", 220),
|
||||
("2021-02-01", 230),
|
||||
("2021-03-01", 240),
|
||||
("2021-04-01", 250),
|
||||
]
|
||||
|
||||
data2 = [
|
||||
("2021-01-01", 240),
|
||||
("2021-02-01", 210),
|
||||
("2021-03-01", 240),
|
||||
("2021-04-01", 270),
|
||||
]
|
||||
|
||||
def test_number_comparison(self):
|
||||
ts1 = pft.TimeSeriesCore(self.data1, "M")
|
||||
assert isinstance(ts1 > 23, pft.TimeSeriesCore)
|
||||
assert (ts1 > 230).values == pft.Series([0.0, 0.0, 1.0, 1.0], "float")
|
||||
assert (ts1 >= 230).values == pft.Series([0.0, 1.0, 1.0, 1.0], "float")
|
||||
assert (ts1 < 240).values == pft.Series([1.0, 1.0, 0.0, 0.0], "float")
|
||||
assert (ts1 <= 240).values == pft.Series([1.0, 1.0, 1.0, 0.0], "float")
|
||||
assert (ts1 == 240).values == pft.Series([0.0, 0.0, 1.0, 0.0], "float")
|
||||
assert (ts1 != 240).values == pft.Series([1.0, 1.0, 0.0, 1.0], "float")
|
||||
|
||||
def test_series_comparison(self):
|
||||
ts1 = pft.TimeSeriesCore(self.data1, "M")
|
||||
ser = pft.Series([240, 210, 240, 270], dtype="int")
|
||||
|
||||
assert (ts1 > ser).values == pft.Series([0.0, 1.0, 0.0, 0.0], "float")
|
||||
assert (ts1 >= ser).values == pft.Series([0.0, 1.0, 1.0, 0.0], "float")
|
||||
assert (ts1 < ser).values == pft.Series([1.0, 0.0, 0.0, 1.0], "float")
|
||||
assert (ts1 <= ser).values == pft.Series([1.0, 0.0, 1.0, 1.0], "float")
|
||||
assert (ts1 == ser).values == pft.Series([0.0, 0.0, 1.0, 0.0], "float")
|
||||
assert (ts1 != ser).values == pft.Series([1.0, 1.0, 0.0, 1.0], "float")
|
||||
|
||||
def test_tsc_comparison(self):
|
||||
ts1 = pft.TimeSeriesCore(self.data1, "M")
|
||||
ts2 = pft.TimeSeriesCore(self.data2, "M")
|
||||
|
||||
assert (ts1 > ts2).values == pft.Series([0.0, 1.0, 0.0, 0.0], "float")
|
||||
assert (ts1 >= ts2).values == pft.Series([0.0, 1.0, 1.0, 0.0], "float")
|
||||
assert (ts1 < ts2).values == pft.Series([1.0, 0.0, 0.0, 1.0], "float")
|
||||
assert (ts1 <= ts2).values == pft.Series([1.0, 0.0, 1.0, 1.0], "float")
|
||||
assert (ts1 == ts2).values == pft.Series([0.0, 0.0, 1.0, 0.0], "float")
|
||||
assert (ts1 != ts2).values == pft.Series([1.0, 1.0, 0.0, 1.0], "float")
|
||||
|
||||
def test_errors(self):
|
||||
ts1 = pft.TimeSeriesCore(self.data1, "M")
|
||||
ts2 = pft.TimeSeriesCore(self.data2, "M")
|
||||
ser = pft.Series([240, 210, 240], dtype="int")
|
||||
ser2 = pft.Series(["2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01"], dtype="date")
|
||||
|
||||
del ts2["2021-04-01"]
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
ts1 == "a"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ts1 > ts2
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
ts1 == ser2
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ts1 <= ser
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
ts2 < [23, 24, 25, 26]
|
||||
|
||||
|
||||
class TestTimeSeriesArithmatic:
|
||||
data = [
|
||||
("2021-01-01", 220),
|
||||
("2021-02-01", 230),
|
||||
("2021-03-01", 240),
|
||||
("2021-04-01", 250),
|
||||
]
|
||||
|
||||
def test_add(self):
|
||||
ts = pft.TimeSeriesCore(self.data, "M")
|
||||
ser = ts.values
|
||||
|
||||
num_add_ts = ts + 40
|
||||
assert num_add_ts["2021-01-01"][1] == 260
|
||||
assert num_add_ts["2021-04-01"][1] == 290
|
||||
|
||||
num_radd_ts = 40 + ts
|
||||
assert num_radd_ts["2021-01-01"][1] == 260
|
||||
assert num_radd_ts["2021-04-01"][1] == 290
|
||||
|
||||
ser_add_ts = ts + ser
|
||||
assert ser_add_ts["2021-01-01"][1] == 440
|
||||
assert ser_add_ts["2021-04-01"][1] == 500
|
||||
|
||||
ts_add_ts = ts + num_add_ts
|
||||
assert ts_add_ts["2021-01-01"][1] == 480
|
||||
assert ts_add_ts["2021-04-01"][1] == 540
|
||||
|
||||
def test_sub(self):
|
||||
ts = pft.TimeSeriesCore(self.data, "M")
|
||||
ser = pft.Series([20, 30, 40, 50], "number")
|
||||
|
||||
num_sub_ts = ts - 40
|
||||
assert num_sub_ts["2021-01-01"][1] == 180
|
||||
assert num_sub_ts["2021-04-01"][1] == 210
|
||||
|
||||
num_rsub_ts = 240 - ts
|
||||
assert num_rsub_ts["2021-01-01"][1] == 20
|
||||
assert num_rsub_ts["2021-04-01"][1] == -10
|
||||
|
||||
ser_sub_ts = ts - ser
|
||||
assert ser_sub_ts["2021-01-01"][1] == 200
|
||||
assert ser_sub_ts["2021-04-01"][1] == 200
|
||||
|
||||
ts_sub_ts = ts - num_sub_ts
|
||||
assert ts_sub_ts["2021-01-01"][1] == 40
|
||||
assert ts_sub_ts["2021-04-01"][1] == 40
|
||||
|
||||
def test_truediv(self):
|
||||
ts = pft.TimeSeriesCore(self.data, "M")
|
||||
ser = pft.Series([22, 23, 24, 25], "number")
|
||||
|
||||
num_div_ts = ts / 10
|
||||
assert num_div_ts["2021-01-01"][1] == 22
|
||||
assert num_div_ts["2021-04-01"][1] == 25
|
||||
|
||||
num_rdiv_ts = 1000 / ts
|
||||
assert num_rdiv_ts["2021-04-01"][1] == 4
|
||||
|
||||
ser_div_ts = ts / ser
|
||||
assert ser_div_ts["2021-01-01"][1] == 10
|
||||
assert ser_div_ts["2021-04-01"][1] == 10
|
||||
|
||||
ts_div_ts = ts / num_div_ts
|
||||
assert ts_div_ts["2021-01-01"][1] == 10
|
||||
assert ts_div_ts["2021-04-01"][1] == 10
|
||||
|
||||
def test_floordiv(self):
|
||||
ts = pft.TimeSeriesCore(self.data, "M")
|
||||
ser = pft.Series([22, 23, 24, 25], "number")
|
||||
|
||||
num_div_ts = ts // 11
|
||||
assert num_div_ts["2021-02-01"][1] == 20
|
||||
assert num_div_ts["2021-04-01"][1] == 22
|
||||
|
||||
num_rdiv_ts = 1000 // ts
|
||||
assert num_rdiv_ts["2021-01-01"][1] == 4
|
||||
|
||||
ser_div_ts = ts // ser
|
||||
assert ser_div_ts["2021-01-01"][1] == 10
|
||||
assert ser_div_ts["2021-04-01"][1] == 10
|
@ -1,151 +0,0 @@
|
||||
import datetime
|
||||
import os
|
||||
import random
|
||||
from typing import Literal, Sequence
|
||||
|
||||
import pytest
|
||||
from fincal.core import Frequency
|
||||
from fincal.fincal import TimeSeries, create_date_series
|
||||
|
||||
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sample_data_path = os.path.join(THIS_DIR, "data")
|
||||
|
||||
|
||||
def create_test_data(
|
||||
frequency: str,
|
||||
eomonth: bool,
|
||||
n: int,
|
||||
gaps: float,
|
||||
month_position: Literal["start", "middle", "end"],
|
||||
date_as_str: bool,
|
||||
as_outer_type: Literal['dict', 'list'] = 'list',
|
||||
as_inner_type: Literal['dict', 'list', 'tuple'] = 'tuple'
|
||||
) -> Sequence[tuple]:
|
||||
start_dates = {
|
||||
"start": datetime.datetime(2016, 1, 1),
|
||||
"middle": datetime.datetime(2016, 1, 15),
|
||||
"end": datetime.datetime(2016, 1, 31),
|
||||
}
|
||||
end_date = datetime.datetime(2021, 12, 31)
|
||||
dates = create_date_series(start_dates[month_position], end_date, frequency=frequency, eomonth=eomonth)
|
||||
dates = dates[:n]
|
||||
if gaps:
|
||||
num_gaps = int(len(dates) * gaps)
|
||||
to_remove = random.sample(dates, num_gaps)
|
||||
for i in to_remove:
|
||||
dates.remove(i)
|
||||
if date_as_str:
|
||||
dates = [i.strftime('%Y-%m-%d') for i in dates]
|
||||
|
||||
values = [random.randint(8000, 90000)/100 for _ in dates]
|
||||
|
||||
data = list(zip(dates, values))
|
||||
if as_outer_type == 'list':
|
||||
if as_inner_type == 'list':
|
||||
data = [list(i) for i in data]
|
||||
elif as_inner_type == 'dict[1]':
|
||||
data = [dict((i,)) for i in data]
|
||||
elif as_inner_type == 'dict[2]':
|
||||
data = [dict(date=i, value=j) for i, j in data]
|
||||
elif as_outer_type == 'dict':
|
||||
data = dict(data)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class TestDateSeries:
|
||||
def test_daily(self):
|
||||
start_date = datetime.datetime(2020, 1, 1)
|
||||
end_date = datetime.datetime(2020, 12, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="D")
|
||||
assert len(d) == 366
|
||||
|
||||
start_date = datetime.datetime(2017, 1, 1)
|
||||
end_date = datetime.datetime(2017, 12, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="D")
|
||||
assert len(d) == 365
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
create_date_series(start_date, end_date, frequency="D", eomonth=True)
|
||||
|
||||
def test_monthly(self):
|
||||
start_date = datetime.datetime(2020, 1, 1)
|
||||
end_date = datetime.datetime(2020, 12, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="M")
|
||||
assert len(d) == 12
|
||||
|
||||
d = create_date_series(start_date, end_date, frequency="M", eomonth=True)
|
||||
assert datetime.datetime(2020, 2, 29) in d
|
||||
|
||||
start_date = datetime.datetime(2020, 1, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="M")
|
||||
assert datetime.datetime(2020, 2, 29) in d
|
||||
assert datetime.datetime(2020, 8, 31) in d
|
||||
assert datetime.datetime(2020, 10, 30) not in d
|
||||
|
||||
start_date = datetime.datetime(2020, 2, 29)
|
||||
d = create_date_series(start_date, end_date, frequency="M")
|
||||
assert len(d) == 11
|
||||
assert datetime.datetime(2020, 2, 29) in d
|
||||
assert datetime.datetime(2020, 8, 31) not in d
|
||||
assert datetime.datetime(2020, 10, 29) in d
|
||||
|
||||
def test_quarterly(self):
|
||||
start_date = datetime.datetime(2018, 1, 1)
|
||||
end_date = datetime.datetime(2020, 12, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="Q")
|
||||
assert len(d) == 12
|
||||
|
||||
d = create_date_series(start_date, end_date, frequency="Q", eomonth=True)
|
||||
assert datetime.datetime(2020, 4, 30) in d
|
||||
|
||||
start_date = datetime.datetime(2020, 1, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="Q")
|
||||
assert len(d) == 4
|
||||
assert datetime.datetime(2020, 2, 29) not in d
|
||||
assert max(d) == datetime.datetime(2020, 10, 31)
|
||||
|
||||
start_date = datetime.datetime(2020, 2, 29)
|
||||
d = create_date_series(start_date, end_date, frequency="Q")
|
||||
assert datetime.datetime(2020, 2, 29) in d
|
||||
assert datetime.datetime(2020, 8, 31) not in d
|
||||
assert datetime.datetime(2020, 11, 29) in d
|
||||
|
||||
d = create_date_series(start_date, end_date, frequency="Q", eomonth=True)
|
||||
assert datetime.datetime(2020, 11, 30) in d
|
||||
|
||||
|
||||
class TestFincal:
|
||||
def test_creation(self):
|
||||
data = create_test_data(frequency='D', eomonth=False, n=50, gaps=0, month_position='start', date_as_str=True)
|
||||
time_series = TimeSeries(data, frequency="D")
|
||||
assert len(time_series) == 50
|
||||
assert isinstance(time_series.frequency, Frequency)
|
||||
assert time_series.frequency.days == 1
|
||||
|
||||
ffill_data = time_series.ffill()
|
||||
assert len(ffill_data) == 50
|
||||
|
||||
data = create_test_data(frequency='D', eomonth=False, n=500, gaps=0.1, month_position='start', date_as_str=True)
|
||||
time_series = TimeSeries(data, frequency="D")
|
||||
assert len(time_series) == 450
|
||||
|
||||
def test_ffill(self):
|
||||
data = create_test_data(frequency='D', eomonth=False, n=500, gaps=0.1, month_position='start', date_as_str=True)
|
||||
time_series = TimeSeries(data, frequency="D")
|
||||
ffill_data = time_series.ffill()
|
||||
assert len(ffill_data) > 498
|
||||
|
||||
ffill_data = time_series.ffill(inplace=True)
|
||||
assert ffill_data is None
|
||||
assert len(time_series) > 498
|
||||
|
||||
def test_slicing(self):
|
||||
data = create_test_data(frequency='D', eomonth=False, n=50, gaps=0, month_position='start', date_as_str=True)
|
||||
time_series = TimeSeries(data, frequency="D")
|
||||
assert time_series[0] is not None
|
||||
assert time_series[:3] is not None
|
||||
assert time_series[5:7] is not None
|
||||
assert isinstance(time_series[0], tuple)
|
||||
assert isinstance(time_series[10:20], list)
|
||||
assert len(time_series[10:20]) == 10
|
588
tests/test_pyfacts.py
Normal file
588
tests/test_pyfacts.py
Normal file
@ -0,0 +1,588 @@
|
||||
import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from pyfacts import (
|
||||
AllFrequencies,
|
||||
Frequency,
|
||||
PyfactsOptions,
|
||||
TimeSeries,
|
||||
create_date_series,
|
||||
)
|
||||
from pyfacts.exceptions import DateNotFoundError
|
||||
|
||||
|
||||
class TestDateSeries:
|
||||
def test_daily(self):
|
||||
start_date = datetime.datetime(2020, 1, 1)
|
||||
end_date = datetime.datetime(2020, 12, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="D")
|
||||
assert len(d) == 366
|
||||
|
||||
start_date = datetime.datetime(2017, 1, 1)
|
||||
end_date = datetime.datetime(2017, 12, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="D")
|
||||
assert len(d) == 365
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
create_date_series(start_date, end_date, frequency="D", eomonth=True)
|
||||
|
||||
def test_monthly(self):
|
||||
start_date = datetime.datetime(2020, 1, 1)
|
||||
end_date = datetime.datetime(2020, 12, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="M", ensure_coverage=False)
|
||||
assert len(d) == 12
|
||||
|
||||
d = create_date_series(start_date, end_date, frequency="M", eomonth=True)
|
||||
assert datetime.datetime(2020, 2, 29) in d
|
||||
|
||||
start_date = datetime.datetime(2020, 1, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="M")
|
||||
assert datetime.datetime(2020, 2, 29) in d
|
||||
assert datetime.datetime(2020, 8, 31) in d
|
||||
assert datetime.datetime(2020, 10, 30) not in d
|
||||
|
||||
start_date = datetime.datetime(2020, 2, 29)
|
||||
d = create_date_series(start_date, end_date, frequency="M")
|
||||
assert len(d) == 11
|
||||
assert datetime.datetime(2020, 2, 29) in d
|
||||
assert datetime.datetime(2020, 8, 31) not in d
|
||||
assert datetime.datetime(2020, 10, 29) in d
|
||||
|
||||
def test_quarterly(self):
|
||||
start_date = datetime.datetime(2018, 1, 1)
|
||||
end_date = datetime.datetime(2020, 12, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="Q")
|
||||
assert len(d) == 12
|
||||
|
||||
d = create_date_series(start_date, end_date, frequency="Q", eomonth=True)
|
||||
assert datetime.datetime(2020, 4, 30) in d
|
||||
|
||||
start_date = datetime.datetime(2020, 1, 31)
|
||||
d = create_date_series(start_date, end_date, frequency="Q")
|
||||
assert len(d) == 4
|
||||
assert datetime.datetime(2020, 2, 29) not in d
|
||||
assert max(d) == datetime.datetime(2020, 10, 31)
|
||||
|
||||
start_date = datetime.datetime(2020, 2, 29)
|
||||
d = create_date_series(start_date, end_date, frequency="Q")
|
||||
assert datetime.datetime(2020, 2, 29) in d
|
||||
assert datetime.datetime(2020, 8, 31) not in d
|
||||
assert datetime.datetime(2020, 11, 29) in d
|
||||
|
||||
d = create_date_series(start_date, end_date, frequency="Q", eomonth=True)
|
||||
assert datetime.datetime(2020, 11, 30) in d
|
||||
|
||||
|
||||
class TestTimeSeriesCreation:
|
||||
def test_creation_with_list_of_tuples(self, create_test_data):
|
||||
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
|
||||
ts = TimeSeries(ts_data, frequency="D")
|
||||
assert len(ts) == 50
|
||||
assert isinstance(ts.frequency, Frequency)
|
||||
assert ts.frequency.days == 1
|
||||
|
||||
def test_creation_with_string_dates(self, create_test_data):
|
||||
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
|
||||
ts_data1 = [(dt.strftime("%Y-%m-%d"), val) for dt, val in ts_data]
|
||||
ts = TimeSeries(ts_data1, frequency="D")
|
||||
datetime.datetime(2017, 1, 1) in ts
|
||||
|
||||
ts_data1 = [(dt.strftime("%d-%m-%Y"), val) for dt, val in ts_data]
|
||||
ts = TimeSeries(ts_data1, frequency="D", date_format="%d-%m-%Y")
|
||||
datetime.datetime(2017, 1, 1) in ts
|
||||
|
||||
ts_data1 = [(dt.strftime("%m-%d-%Y"), val) for dt, val in ts_data]
|
||||
ts = TimeSeries(ts_data1, frequency="D", date_format="%m-%d-%Y")
|
||||
datetime.datetime(2017, 1, 1) in ts
|
||||
|
||||
ts_data1 = [(dt.strftime("%m-%d-%Y %H:%M"), val) for dt, val in ts_data]
|
||||
ts = TimeSeries(ts_data1, frequency="D", date_format="%m-%d-%Y %H:%M")
|
||||
datetime.datetime(2017, 1, 1, 0, 0) in ts
|
||||
|
||||
def test_creation_with_list_of_dicts(self, create_test_data):
|
||||
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
|
||||
ts_data1 = [{"date": dt.strftime("%Y-%m-%d"), "value": val} for dt, val in ts_data]
|
||||
ts = TimeSeries(ts_data1, frequency="D")
|
||||
datetime.datetime(2017, 1, 1) in ts
|
||||
|
||||
def test_creation_with_list_of_lists(self, create_test_data):
|
||||
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
|
||||
ts_data1 = [[dt.strftime("%Y-%m-%d"), val] for dt, val in ts_data]
|
||||
ts = TimeSeries(ts_data1, frequency="D")
|
||||
datetime.datetime(2017, 1, 1) in ts
|
||||
|
||||
def test_creation_with_dict(self, create_test_data):
|
||||
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
|
||||
ts_data1 = [{dt.strftime("%Y-%m-%d"): val} for dt, val in ts_data]
|
||||
ts = TimeSeries(ts_data1, frequency="D")
|
||||
datetime.datetime(2017, 1, 1) in ts
|
||||
|
||||
|
||||
class TestTimeSeriesBasics:
|
||||
def test_fill(self, create_test_data):
|
||||
PyfactsOptions.get_closest = "exact"
|
||||
ts_data = create_test_data(frequency=AllFrequencies.D, num=50, skip_weekends=True)
|
||||
ts = TimeSeries(ts_data, frequency="D")
|
||||
ffill_data = ts.ffill()
|
||||
assert len(ffill_data) == 68
|
||||
|
||||
ffill_data = ts.ffill(inplace=True)
|
||||
assert ffill_data is None
|
||||
assert len(ts) == 68
|
||||
|
||||
ts_data = create_test_data(frequency=AllFrequencies.D, num=50, skip_weekends=True)
|
||||
ts = TimeSeries(ts_data, frequency="D")
|
||||
bfill_data = ts.bfill()
|
||||
assert len(bfill_data) == 68
|
||||
|
||||
bfill_data = ts.bfill(inplace=True)
|
||||
assert bfill_data is None
|
||||
assert len(ts) == 68
|
||||
|
||||
data = [("2021-01-01", 220), ("2021-01-02", 230), ("2021-01-04", 240)]
|
||||
ts = TimeSeries(data, frequency="D")
|
||||
ff = ts.ffill()
|
||||
assert ff["2021-01-03"][1] == 230
|
||||
|
||||
bf = ts.bfill()
|
||||
assert bf["2021-01-03"][1] == 240
|
||||
|
||||
def test_fill_weekly(self, create_test_data):
|
||||
ts_data = create_test_data(frequency=AllFrequencies.W, num=10)
|
||||
ts_data.pop(2)
|
||||
ts_data.pop(6)
|
||||
ts = TimeSeries(ts_data, frequency="W")
|
||||
assert len(ts) == 8
|
||||
|
||||
ff = ts.ffill()
|
||||
assert len(ff) == 10
|
||||
assert "2017-01-15" in ff
|
||||
assert ff["2017-01-15"][1] == ff["2017-01-08"][1]
|
||||
|
||||
bf = ts.bfill()
|
||||
assert len(ff) == 10
|
||||
assert "2017-01-15" in bf
|
||||
assert bf["2017-01-15"][1] == bf["2017-01-22"][1]
|
||||
|
||||
def test_fill_monthly(self, create_test_data):
|
||||
ts_data = create_test_data(frequency=AllFrequencies.M, num=10)
|
||||
ts_data.pop(2)
|
||||
ts_data.pop(6)
|
||||
ts = TimeSeries(ts_data, frequency="M")
|
||||
assert len(ts) == 8
|
||||
|
||||
ff = ts.ffill()
|
||||
assert len(ff) == 10
|
||||
assert "2017-03-01" in ff
|
||||
assert ff["2017-03-01"][1] == ff["2017-02-01"][1]
|
||||
|
||||
bf = ts.bfill()
|
||||
assert len(bf) == 10
|
||||
assert "2017-08-01" in bf
|
||||
assert bf["2017-08-01"][1] == bf["2017-09-01"][1]
|
||||
|
||||
def test_fill_eomonthly(self, create_test_data):
|
||||
ts_data = create_test_data(frequency=AllFrequencies.M, num=10, eomonth=True)
|
||||
ts_data.pop(2)
|
||||
ts_data.pop(6)
|
||||
ts = TimeSeries(ts_data, frequency="M")
|
||||
assert len(ts) == 8
|
||||
|
||||
ff = ts.ffill()
|
||||
assert len(ff) == 10
|
||||
assert "2017-03-31" in ff
|
||||
assert ff["2017-03-31"][1] == ff["2017-02-28"][1]
|
||||
|
||||
bf = ts.bfill()
|
||||
assert len(bf) == 10
|
||||
assert "2017-08-31" in bf
|
||||
assert bf["2017-08-31"][1] == bf["2017-09-30"][1]
|
||||
|
||||
def test_fill_quarterly(self, create_test_data):
|
||||
ts_data = create_test_data(frequency=AllFrequencies.Q, num=10, eomonth=True)
|
||||
ts_data.pop(2)
|
||||
ts_data.pop(6)
|
||||
ts = TimeSeries(ts_data, frequency="Q")
|
||||
assert len(ts) == 8
|
||||
|
||||
ff = ts.ffill()
|
||||
assert len(ff) == 10
|
||||
assert "2017-07-31" in ff
|
||||
assert ff["2017-07-31"][1] == ff["2017-04-30"][1]
|
||||
|
||||
bf = ts.bfill()
|
||||
assert len(bf) == 10
|
||||
assert "2018-10-31" in bf
|
||||
assert bf["2018-10-31"][1] == bf["2019-01-31"][1]
|
||||
|
||||
|
||||
class TestReturns:
|
||||
def test_returns_calc(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.D, skip_weekends=True)
|
||||
ts = TimeSeries(ts_data, "D")
|
||||
returns = ts.calculate_returns(
|
||||
"2020-01-01", annual_compounded_returns=False, return_period_unit="years", return_period_value=1
|
||||
)
|
||||
assert round(returns[1], 6) == 0.112913
|
||||
|
||||
returns = ts.calculate_returns(
|
||||
"2020-04-01", annual_compounded_returns=False, return_period_unit="months", return_period_value=3
|
||||
)
|
||||
assert round(returns[1], 6) == 0.015908
|
||||
|
||||
returns = ts.calculate_returns(
|
||||
"2020-04-01", annual_compounded_returns=True, return_period_unit="months", return_period_value=3
|
||||
)
|
||||
assert round(returns[1], 6) == 0.065167
|
||||
|
||||
returns = ts.calculate_returns(
|
||||
"2020-04-01", annual_compounded_returns=False, return_period_unit="days", return_period_value=90
|
||||
)
|
||||
assert round(returns[1], 6) == 0.017673
|
||||
|
||||
returns = ts.calculate_returns(
|
||||
"2020-04-01", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
|
||||
)
|
||||
assert round(returns[1], 6) == 0.073632
|
||||
|
||||
with pytest.raises(DateNotFoundError):
|
||||
ts.calculate_returns("2020-04-04", return_period_unit="days", return_period_value=90, as_on_match="exact")
|
||||
with pytest.raises(DateNotFoundError):
|
||||
ts.calculate_returns("2020-04-08", return_period_unit="months", return_period_value=1, prior_match="exact")
|
||||
|
||||
def test_date_formats(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.D, skip_weekends=True)
|
||||
ts = TimeSeries(ts_data, "D")
|
||||
PyfactsOptions.date_format = "%d-%m-%Y"
|
||||
with pytest.raises(ValueError):
|
||||
ts.calculate_returns(
|
||||
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
|
||||
)
|
||||
|
||||
returns1 = ts.calculate_returns(
|
||||
"2020-04-01", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
|
||||
)
|
||||
returns2 = ts.calculate_returns("01-04-2020", return_period_unit="days", return_period_value=90)
|
||||
assert round(returns1[1], 6) == round(returns2[1], 6) == 0.073632
|
||||
|
||||
PyfactsOptions.date_format = "%m-%d-%Y"
|
||||
with pytest.raises(ValueError):
|
||||
ts.calculate_returns(
|
||||
"2020-04-01", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
|
||||
)
|
||||
|
||||
returns1 = ts.calculate_returns(
|
||||
"2020-04-01", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
|
||||
)
|
||||
returns2 = ts.calculate_returns("04-01-2020", return_period_unit="days", return_period_value=90)
|
||||
assert round(returns1[1], 6) == round(returns2[1], 6) == 0.073632
|
||||
|
||||
def test_limits(self, create_test_data):
|
||||
PyfactsOptions.date_format = "%Y-%m-%d"
|
||||
ts_data = create_test_data(AllFrequencies.D)
|
||||
ts = TimeSeries(ts_data, "D")
|
||||
with pytest.raises(DateNotFoundError):
|
||||
ts.calculate_returns("2020-11-25", return_period_unit="days", return_period_value=90, closest_max_days=10)
|
||||
|
||||
def test_rolling_returns(self):
|
||||
# To-do
|
||||
return True
|
||||
|
||||
|
||||
class TestExpand:
|
||||
def test_weekly_to_daily(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.W, num=10)
|
||||
ts = TimeSeries(ts_data, "W")
|
||||
expanded_ts = ts.expand("D", "ffill")
|
||||
assert len(expanded_ts) == 64
|
||||
assert expanded_ts.frequency.name == "daily"
|
||||
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
|
||||
|
||||
def test_weekly_to_daily_no_weekends(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.W, num=10)
|
||||
ts = TimeSeries(ts_data, "W")
|
||||
expanded_ts = ts.expand("D", "ffill", skip_weekends=True)
|
||||
assert len(expanded_ts) == 46
|
||||
assert expanded_ts.frequency.name == "daily"
|
||||
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
|
||||
|
||||
def test_monthly_to_daily(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.M, num=6)
|
||||
ts = TimeSeries(ts_data, "M")
|
||||
expanded_ts = ts.expand("D", "ffill")
|
||||
assert len(expanded_ts) == 152
|
||||
assert expanded_ts.frequency.name == "daily"
|
||||
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
|
||||
|
||||
def test_monthly_to_daily_no_weekends(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.M, num=6)
|
||||
ts = TimeSeries(ts_data, "M")
|
||||
expanded_ts = ts.expand("D", "ffill", skip_weekends=True)
|
||||
assert len(expanded_ts) == 109
|
||||
assert expanded_ts.frequency.name == "daily"
|
||||
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
|
||||
|
||||
def test_monthly_to_weekly(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.M, num=6)
|
||||
ts = TimeSeries(ts_data, "M")
|
||||
expanded_ts = ts.expand("W", "ffill")
|
||||
assert len(expanded_ts) == 23
|
||||
assert expanded_ts.frequency.name == "weekly"
|
||||
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
|
||||
|
||||
def test_yearly_to_monthly(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.Y, num=5)
|
||||
ts = TimeSeries(ts_data, "Y")
|
||||
expanded_ts = ts.expand("M", "ffill")
|
||||
assert len(expanded_ts) == 49
|
||||
assert expanded_ts.frequency.name == "monthly"
|
||||
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
|
||||
|
||||
|
||||
class TestShrink:
|
||||
def test_daily_to_smaller(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.D, num=1000)
|
||||
ts = TimeSeries(ts_data, "D")
|
||||
shrunk_ts_w = ts.shrink("W", "ffill")
|
||||
shrunk_ts_m = ts.shrink("M", "ffill")
|
||||
assert len(shrunk_ts_w) == 144
|
||||
assert len(shrunk_ts_m) == 34
|
||||
|
||||
def test_weekly_to_smaller(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.W, num=300)
|
||||
ts = TimeSeries(ts_data, "W")
|
||||
tsm = ts.shrink("M", "ffill")
|
||||
assert len(tsm) == 70
|
||||
tsmeo = ts.shrink("M", "ffill", eomonth=True)
|
||||
assert len(tsmeo) == 69
|
||||
with pytest.raises(ValueError):
|
||||
ts.shrink("D", "ffill")
|
||||
|
||||
|
||||
class TestMeanReturns:
|
||||
# TODO
|
||||
pass
|
||||
|
||||
|
||||
class TestReadCsv:
|
||||
# TODO
|
||||
pass
|
||||
|
||||
|
||||
class TestTransform:
|
||||
def test_daily_to_weekly(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.D, num=782, skip_weekends=True)
|
||||
ts = TimeSeries(ts_data, "D")
|
||||
tst = ts.transform("W", "mean", ensure_coverage=False)
|
||||
assert isinstance(tst, TimeSeries)
|
||||
assert len(tst) == 157
|
||||
assert "2017-01-30" in tst
|
||||
assert tst.iloc[4] == (datetime.datetime(2017, 1, 30), 1020.082)
|
||||
|
||||
def test_daily_to_monthly(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.D, num=782, skip_weekends=False)
|
||||
ts = TimeSeries(ts_data, "D")
|
||||
tst = ts.transform("M", "mean")
|
||||
assert isinstance(tst, TimeSeries)
|
||||
assert len(tst) == 27
|
||||
assert "2018-01-01" in tst
|
||||
assert round(tst.iloc[12][1], 2) == 1146.91
|
||||
|
||||
def test_daily_to_yearly(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.D, num=782, skip_weekends=True)
|
||||
ts = TimeSeries(ts_data, "D")
|
||||
tst = ts.transform("Y", "mean")
|
||||
assert isinstance(tst, TimeSeries)
|
||||
assert len(tst) == 4
|
||||
assert "2019-01-02" in tst
|
||||
assert tst.iloc[2] == (datetime.datetime(2019, 1, 2), 1157.2835632183908)
|
||||
|
||||
def test_weekly_to_monthly(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.W, num=261)
|
||||
ts = TimeSeries(ts_data, "W")
|
||||
tst = ts.transform("M", "mean")
|
||||
assert isinstance(tst, TimeSeries)
|
||||
assert "2017-01-01" in tst
|
||||
assert tst.iloc[1] == (datetime.datetime(2017, 2, 1), 1008.405)
|
||||
|
||||
def test_weekly_to_qty(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.W, num=261)
|
||||
ts = TimeSeries(ts_data, "W")
|
||||
tst = ts.transform("Q", "mean")
|
||||
assert len(tst) == 21
|
||||
assert "2018-01-01" in tst
|
||||
assert round(tst.iloc[4][1], 2) == 1032.01
|
||||
|
||||
def test_weekly_to_yearly(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.W, num=261)
|
||||
ts = TimeSeries(ts_data, "W")
|
||||
tst = ts.transform("Y", "mean")
|
||||
assert "2019-01-01" in tst
|
||||
assert round(tst.iloc[2][1], 2) == 1053.70
|
||||
with pytest.raises(ValueError):
|
||||
ts.transform("D", "mean")
|
||||
|
||||
def test_monthly_to_qty(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.M, num=36)
|
||||
ts = TimeSeries(ts_data, "M")
|
||||
tst = ts.transform("Q", "mean")
|
||||
assert len(tst) == 13
|
||||
assert "2018-10-01" in tst
|
||||
assert tst.iloc[7] == (datetime.datetime(2018, 10, 1), 1022.6466666666666)
|
||||
with pytest.raises(ValueError):
|
||||
ts.transform("M", "sum")
|
||||
|
||||
|
||||
class TestReturnsAgain:
|
||||
data = [
|
||||
("2020-01-01", 10),
|
||||
("2020-02-01", 12),
|
||||
("2020-03-01", 14),
|
||||
("2020-04-01", 16),
|
||||
("2020-05-01", 18),
|
||||
("2020-06-01", 20),
|
||||
("2020-07-01", 22),
|
||||
("2020-08-01", 24),
|
||||
("2020-09-01", 26),
|
||||
("2020-10-01", 28),
|
||||
("2020-11-01", 30),
|
||||
("2020-12-01", 32),
|
||||
("2021-01-01", 34),
|
||||
]
|
||||
|
||||
def test_returns_calc(self):
|
||||
ts = TimeSeries(self.data, frequency="M")
|
||||
returns = ts.calculate_returns(
|
||||
"2021-01-01", annual_compounded_returns=False, return_period_unit="years", return_period_value=1
|
||||
)
|
||||
assert returns[1] == 2.4
|
||||
returns = ts.calculate_returns(
|
||||
"2020-04-01", annual_compounded_returns=False, return_period_unit="months", return_period_value=3
|
||||
)
|
||||
assert round(returns[1], 4) == 0.6
|
||||
returns = ts.calculate_returns(
|
||||
"2020-04-01", annual_compounded_returns=True, return_period_unit="months", return_period_value=3
|
||||
)
|
||||
assert round(returns[1], 4) == 5.5536
|
||||
returns = ts.calculate_returns(
|
||||
"2020-04-01", annual_compounded_returns=False, return_period_unit="days", return_period_value=90
|
||||
)
|
||||
assert round(returns[1], 4) == 0.6
|
||||
returns = ts.calculate_returns(
|
||||
"2020-04-01", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
|
||||
)
|
||||
assert round(returns[1], 4) == 5.727
|
||||
returns = ts.calculate_returns(
|
||||
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
|
||||
)
|
||||
assert round(returns[1], 4) == 5.727
|
||||
with pytest.raises(DateNotFoundError):
|
||||
ts.calculate_returns("2020-04-10", return_period_unit="days", return_period_value=90, as_on_match="exact")
|
||||
with pytest.raises(DateNotFoundError):
|
||||
ts.calculate_returns("2020-04-10", return_period_unit="days", return_period_value=90, prior_match="exact")
|
||||
|
||||
def test_date_formats(self):
|
||||
ts = TimeSeries(self.data, frequency="M")
|
||||
PyfactsOptions.date_format = "%d-%m-%Y"
|
||||
with pytest.raises(ValueError):
|
||||
ts.calculate_returns(
|
||||
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
|
||||
)
|
||||
|
||||
returns1 = ts.calculate_returns(
|
||||
"2020-04-10", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
|
||||
)
|
||||
returns2 = ts.calculate_returns("10-04-2020", return_period_unit="days", return_period_value=90)
|
||||
assert round(returns1[1], 4) == round(returns2[1], 4) == 5.727
|
||||
|
||||
PyfactsOptions.date_format = "%m-%d-%Y"
|
||||
with pytest.raises(ValueError):
|
||||
ts.calculate_returns(
|
||||
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
|
||||
)
|
||||
|
||||
returns1 = ts.calculate_returns(
|
||||
"2020-04-10", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
|
||||
)
|
||||
returns2 = ts.calculate_returns("04-10-2020", return_period_unit="days", return_period_value=90)
|
||||
assert round(returns1[1], 4) == round(returns2[1], 4) == 5.727
|
||||
|
||||
def test_limits(self):
|
||||
ts = TimeSeries(self.data, frequency="M")
|
||||
PyfactsOptions.date_format = "%Y-%m-%d"
|
||||
with pytest.raises(DateNotFoundError):
|
||||
ts.calculate_returns("2020-04-25", return_period_unit="days", return_period_value=90, closest_max_days=10)
|
||||
|
||||
|
||||
class TestVolatility:
|
||||
def test_daily_ts(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.D)
|
||||
ts = TimeSeries(ts_data, "D")
|
||||
assert len(ts) == 1000
|
||||
sd = ts.volatility(annualize_volatility=False)
|
||||
assert round(sd, 6) == 0.002622
|
||||
sd = ts.volatility()
|
||||
assert round(sd, 6) == 0.050098
|
||||
sd = ts.volatility(annual_compounded_returns=True)
|
||||
assert round(sd, 4) == 37.9329
|
||||
sd = ts.volatility(return_period_unit="months", annual_compounded_returns=True)
|
||||
assert round(sd, 4) == 0.6778
|
||||
sd = ts.volatility(return_period_unit="years")
|
||||
assert round(sd, 6) == 0.023164
|
||||
sd = ts.volatility(from_date="2017-10-01", to_date="2019-08-31", annualize_volatility=True)
|
||||
assert round(sd, 6) == 0.050559
|
||||
sd = ts.volatility(from_date="2017-02-01", frequency="M", return_period_unit="months")
|
||||
assert round(sd, 6) == 0.050884
|
||||
sd = ts.volatility(
|
||||
frequency="M",
|
||||
return_period_unit="months",
|
||||
return_period_value=3,
|
||||
annualize_volatility=False,
|
||||
)
|
||||
assert round(sd, 6) == 0.020547
|
||||
|
||||
|
||||
class TestDrawdown:
|
||||
def test_daily_ts(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.D, skip_weekends=True)
|
||||
ts = TimeSeries(ts_data, "D")
|
||||
mdd = ts.max_drawdown()
|
||||
assert isinstance(mdd, dict)
|
||||
assert len(mdd) == 3
|
||||
assert all(i in mdd for i in ["start_date", "end_date", "drawdown"])
|
||||
expeced_response = {
|
||||
"start_date": datetime.datetime(2017, 6, 6, 0, 0),
|
||||
"end_date": datetime.datetime(2017, 7, 31, 0, 0),
|
||||
"drawdown": -0.028293686030751997,
|
||||
}
|
||||
assert mdd == expeced_response
|
||||
|
||||
def test_weekly_ts(self, create_test_data):
|
||||
ts_data = create_test_data(AllFrequencies.W, mu=1, sigma=0.5)
|
||||
ts = TimeSeries(ts_data, "W")
|
||||
mdd = ts.max_drawdown()
|
||||
assert isinstance(mdd, dict)
|
||||
assert len(mdd) == 3
|
||||
assert all(i in mdd for i in ["start_date", "end_date", "drawdown"])
|
||||
expeced_response = {
|
||||
"start_date": datetime.datetime(2019, 2, 17, 0, 0),
|
||||
"end_date": datetime.datetime(2019, 11, 17, 0, 0),
|
||||
"drawdown": -0.2584760499552089,
|
||||
}
|
||||
assert mdd == expeced_response
|
||||
|
||||
|
||||
class TestSync:
|
||||
def test_weekly_to_daily(self, create_test_data):
|
||||
daily_data = create_test_data(AllFrequencies.D, num=15)
|
||||
weekly_data = create_test_data(AllFrequencies.W, num=3)
|
||||
|
||||
daily_ts = TimeSeries(daily_data, frequency="D")
|
||||
weekly_ts = TimeSeries(weekly_data, frequency="W")
|
||||
|
||||
synced_weekly_ts = daily_ts.sync(weekly_ts)
|
||||
assert len(daily_ts) == len(synced_weekly_ts)
|
||||
assert synced_weekly_ts.frequency == AllFrequencies.D
|
||||
assert "2017-01-02" in synced_weekly_ts
|
||||
assert synced_weekly_ts["2017-01-02"][1] == synced_weekly_ts["2017-01-01"][1]
|
172
tests/test_stats.py
Normal file
172
tests/test_stats.py
Normal file
@ -0,0 +1,172 @@
|
||||
import pyfacts as pft
|
||||
|
||||
|
||||
def test_conf(conf_fun):
|
||||
conf_add = conf_fun
|
||||
assert conf_add(2, 4) == 6
|
||||
|
||||
|
||||
class TestSharpe:
|
||||
def test_sharpe_daily_freq(self, create_test_data):
|
||||
data = create_test_data(num=1305, frequency=pft.AllFrequencies.D, skip_weekends=True)
|
||||
ts = pft.TimeSeries(data, "D")
|
||||
sharpe_ratio = pft.sharpe_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.06,
|
||||
from_date="2017-02-04",
|
||||
to_date="2021-12-31",
|
||||
return_period_unit="months",
|
||||
return_period_value=1,
|
||||
)
|
||||
assert round(sharpe_ratio, 4) == 1.0502
|
||||
|
||||
sharpe_ratio = pft.sharpe_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.06,
|
||||
from_date="2017-01-09",
|
||||
to_date="2021-12-31",
|
||||
return_period_unit="days",
|
||||
return_period_value=7,
|
||||
)
|
||||
assert round(sharpe_ratio, 4) == 1.0701
|
||||
|
||||
sharpe_ratio = pft.sharpe_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.06,
|
||||
from_date="2018-01-02",
|
||||
to_date="2021-12-31",
|
||||
return_period_unit="years",
|
||||
return_period_value=1,
|
||||
)
|
||||
assert round(sharpe_ratio, 4) == 1.4374
|
||||
|
||||
sharpe_ratio = pft.sharpe_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.06,
|
||||
from_date="2017-07-03",
|
||||
to_date="2021-12-31",
|
||||
return_period_unit="months",
|
||||
return_period_value=6,
|
||||
)
|
||||
assert round(sharpe_ratio, 4) == 0.8401
|
||||
|
||||
def test_sharpe_weekly_freq(self, create_test_data):
|
||||
data = create_test_data(num=261, frequency=pft.AllFrequencies.W, mu=0.6, sigma=0.7)
|
||||
ts = pft.TimeSeries(data, "W")
|
||||
sharpe_ratio = pft.sharpe_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.052,
|
||||
from_date="2017-01-08",
|
||||
to_date="2021-12-31",
|
||||
return_period_unit="days",
|
||||
return_period_value=7,
|
||||
)
|
||||
assert round(sharpe_ratio, 4) == 0.4533
|
||||
|
||||
sharpe_ratio = pft.sharpe_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.052,
|
||||
from_date="2017-02-05",
|
||||
to_date="2021-12-31",
|
||||
return_period_unit="months",
|
||||
return_period_value=1,
|
||||
)
|
||||
assert round(sharpe_ratio, 4) == 0.4898
|
||||
|
||||
sharpe_ratio = pft.sharpe_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.052,
|
||||
from_date="2018-01-01",
|
||||
to_date="2021-12-31",
|
||||
return_period_unit="months",
|
||||
return_period_value=12,
|
||||
)
|
||||
assert round(sharpe_ratio, 4) == 0.3199
|
||||
|
||||
|
||||
class TestSortino:
|
||||
def test_sortino_daily_freq(self, create_test_data):
|
||||
data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.12)
|
||||
ts = pft.TimeSeries(data, "D")
|
||||
sortino_ratio = pft.sortino_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.06 / 12,
|
||||
from_date="2017-02-02",
|
||||
return_period_unit="months",
|
||||
return_period_value=1,
|
||||
)
|
||||
assert round(sortino_ratio, 4) == 1.625
|
||||
|
||||
sortino_ratio = pft.sortino_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.06,
|
||||
from_date="2018-01-02",
|
||||
return_period_unit="years",
|
||||
return_period_value=1,
|
||||
)
|
||||
assert round(sortino_ratio, 4) == 1.2564
|
||||
|
||||
def test_sortino_weekly_freq(self, create_test_data):
|
||||
data = create_test_data(num=500, frequency=pft.AllFrequencies.W, mu=0.12, sigma=0.06)
|
||||
ts = pft.TimeSeries(data, "W")
|
||||
sortino = pft.sortino_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.06,
|
||||
return_period_unit="years",
|
||||
return_period_value=1,
|
||||
)
|
||||
assert round(sortino, 4) == -5.5233
|
||||
|
||||
sortino = pft.sortino_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.052,
|
||||
from_date="2017-02-05",
|
||||
to_date="2021-12-31",
|
||||
return_period_unit="months",
|
||||
return_period_value=1,
|
||||
)
|
||||
assert round(sortino, 4) == -1.93
|
||||
|
||||
sortino = pft.sortino_ratio(
|
||||
ts,
|
||||
risk_free_rate=0.052,
|
||||
from_date="2018-01-01",
|
||||
to_date="2021-12-31",
|
||||
return_period_unit="months",
|
||||
return_period_value=12,
|
||||
)
|
||||
assert round(sortino, 4) == -3.9805
|
||||
|
||||
|
||||
class TestBeta:
|
||||
def test_beta_daily_freq(self, create_test_data):
|
||||
market_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D)
|
||||
stock_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.08)
|
||||
sts = pft.TimeSeries(stock_data, "D")
|
||||
mts = pft.TimeSeries(market_data, "D")
|
||||
beta = pft.beta(sts, mts, frequency="D", return_period_unit="days", return_period_value=1)
|
||||
assert round(beta, 4) == 1.5997
|
||||
|
||||
def test_beta_daily_freq_daily_returns(self, create_test_data):
|
||||
market_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D)
|
||||
stock_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.08)
|
||||
sts = pft.TimeSeries(stock_data, "D")
|
||||
mts = pft.TimeSeries(market_data, "D")
|
||||
beta = pft.beta(sts, mts)
|
||||
assert round(beta, 4) == 1.6287
|
||||
|
||||
def test_beta_monthly_freq(self, create_test_data):
|
||||
market_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D)
|
||||
stock_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.08)
|
||||
sts = pft.TimeSeries(stock_data, "D")
|
||||
mts = pft.TimeSeries(market_data, "D")
|
||||
beta = pft.beta(sts, mts, frequency="M")
|
||||
assert round(beta, 4) == 1.6131
|
||||
|
||||
def test_beta_monthly_freq_monthly_returns(self, create_test_data):
|
||||
market_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D)
|
||||
stock_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.08)
|
||||
sts = pft.TimeSeries(stock_data, "D")
|
||||
mts = pft.TimeSeries(market_data, "D")
|
||||
beta = pft.beta(sts, mts, frequency="M", return_period_unit="months", return_period_value=1)
|
||||
assert round(beta, 4) == 1.5887
|
26
tests/test_utils.py
Normal file
26
tests/test_utils.py
Normal file
@ -0,0 +1,26 @@
|
||||
import datetime
|
||||
|
||||
import pytest
|
||||
from pyfacts.utils import _interval_to_years, _parse_date
|
||||
|
||||
|
||||
class TestParseDate:
|
||||
def test_parsing(self):
|
||||
dt = datetime.datetime(2020, 1, 1)
|
||||
assert _parse_date(dt) == dt
|
||||
assert _parse_date(dt.strftime("%Y-%m-%d")) == dt
|
||||
assert _parse_date(datetime.date(2020, 1, 1)) == dt
|
||||
assert _parse_date("01-01-2020", date_format="%d-%m-%Y") == dt
|
||||
assert _parse_date("01-01-2020", date_format="%m-%d-%Y") == dt
|
||||
|
||||
def test_errors(self):
|
||||
with pytest.raises(ValueError):
|
||||
_parse_date("01-01-2020")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
_parse_date("abcdefg")
|
||||
|
||||
|
||||
class TestIntervalToYears:
|
||||
def test_months(self):
|
||||
assert _interval_to_years("months", 6) == 0.5
|
Loading…
Reference in New Issue
Block a user