Compare commits

...

116 Commits

Author SHA1 Message Date
173fba0f03 added more python versions 2024-08-13 22:14:58 +05:30
a4ecd38b97 All transform tests are now passing 2023-08-20 14:18:36 +05:30
3e5875b873 Merge branch 'master' of http://192.168.0.114:3000/buddy/fincal 2023-08-19 22:23:41 +05:30
2f50894b46 updated some tests 2023-08-19 22:23:38 +05:30
Gourav Kumar
06be27d46c Added ensure_coverage parameter to transform function 2023-07-17 10:22:43 +05:30
c453ff20e5 Merge branch 'master' of http://192.168.0.114:3000/buddy/fincal 2023-07-16 19:54:23 +05:30
a455cdfc65 updated tests, all are accurate and passing 2023-07-16 19:54:14 +05:30
f317a93bfe rewrote generate_date_series using while loop 2023-07-16 19:53:24 +05:30
Gourav Kumar
14afb1400a Corrected date getting missed in transform 2023-03-26 20:33:01 +05:30
0bab00f455 added eomonth to transform function 2023-03-26 13:24:49 +05:30
56e8017de7 updated some tests, added test for shrink, not working yet 2023-03-25 18:37:06 +05:30
7108fa2a56 resolved issues with tox 2023-03-25 11:41:26 +05:30
6cf56ddf11 committing pending changes 2023-03-25 11:18:25 +05:30
3a5ca91234 documentation of read_csv 2023-01-01 18:01:57 +05:30
db8f73d5c6 sortino tests are now passing 2022-09-11 15:13:16 +05:30
7b65f6ff3f find closest date returns nan for min/max failure as well 2022-09-11 15:13:06 +05:30
18b60bd608 sortino filters out nan in rolling returns 2022-09-11 15:12:37 +05:30
0fec9abac0 handled nan in calculate returns and SD 2022-09-11 15:10:12 +05:30
c35bc35529 changed find_closest_date input from dict to TimeSeries 2022-07-24 08:52:30 +05:30
c4e1d8b586 corrected return calc test, writing sortino tests 2022-07-24 08:48:03 +05:30
db8377f0ef added preprocess fromto date, does not work properly yet 2022-07-24 08:47:12 +05:30
583ca98e51 type hints and docs 2022-07-24 08:46:45 +05:30
b1305ca89d Expanded sortino function 2022-07-24 08:46:12 +05:30
ef68ae0293 type hints and docs 2022-07-24 08:45:45 +05:30
cae704b658 renamed file 2022-06-26 21:35:23 +05:30
a69f3e495e added testing guidelines 2022-06-26 21:33:32 +05:30
40429fc70a deleted local test files 2022-06-26 20:55:30 +05:30
c7e955f91e deleted files not required 2022-06-26 20:54:11 +05:30
d0c087c3bf deleted files not required 2022-06-26 20:53:17 +05:30
2367d1aef8 ignore build folder and other support files 2022-06-26 20:51:11 +05:30
31abaa4052 changed interval days calculation to math.ceil
This is to avoid distorting round figure years
2022-06-26 20:40:03 +05:30
d229c9cf2d removed entrypoint 2022-06-25 18:50:25 +05:30
a8b90182da removed unused libraries 2022-06-25 13:51:05 +05:30
48e47e34a8 ignore intellij idea folder 2022-06-25 13:22:52 +05:30
469c421639 updated tox.ini 2022-06-25 13:20:55 +05:30
3bc7e7b496 black changes 2022-06-25 13:20:44 +05:30
a395f7d98d defined custom covariance function to make it compatible with <3.10
replace statistics.coariance in statistics file
2022-06-25 13:20:25 +05:30
56baf83a77 updated values to match with custom covariance function 2022-06-25 13:19:26 +05:30
8c159062f5 renamed files to prevent testing errors in tox 2022-06-25 13:18:58 +05:30
371b319e9d Expanded with more methods and examples 2022-06-12 21:36:26 +05:30
a0499ca157 bug fix in frequency parsing 2022-06-12 21:35:55 +05:30
33c56d8f6c bug fix in sortino 2022-06-12 21:35:37 +05:30
e450395ad0 tests for sortino and missing frequency 2022-06-12 21:35:13 +05:30
3ffec7b11b handled issues with frequency validation 2022-06-11 17:56:42 +05:30
6c8800bef2 Implemented sortino ratio 2022-06-06 22:10:16 +05:30
f46ebaa8a9 Renamed test file 2022-06-06 21:58:07 +05:30
e9bb795ecf Tests for beta and bug fixes 2022-06-06 08:33:58 +05:30
569f20709b Updated test file 2022-06-06 08:17:12 +05:30
c713e3283b improved Readme 2022-06-05 23:12:04 +05:30
0bf1deac48 renamed module to PyFacts 2022-06-05 23:06:12 +05:30
c605f71f10 Frequency validation and automatic assignment during creation
modified tests to account for the changes.
2022-06-05 12:53:40 +05:30
a6fcd29a34 Added to_dict and to_list methods 2022-06-04 22:48:31 +05:30
8117986742 Added correlation function 2022-06-04 22:33:09 +05:30
da2993ebf0 added documentation for Jensen's alpha 2022-06-04 21:32:51 +05:30
f41b9c7519 Added Jensen's alpha to statistics
Also improved doc for beta
2022-06-04 21:30:34 +05:30
7504c840eb documentation 2022-06-04 15:35:16 +05:30
1682fe12cc Completed beta function 2022-05-31 21:18:55 +05:30
177e3bc4c8 implemented beta, yet to check edge cases 2022-05-29 17:56:00 +05:30
922fe0f027 more tests for transform 2022-05-25 10:02:25 +05:30
38fb9ca7d0 tests fro transform method 2022-05-24 21:11:46 +05:30
0a113fdd8a completed transform function 2022-05-24 21:11:34 +05:30
9a71cdf355 transform testing 2022-05-24 21:11:15 +05:30
66ad448516 Added proper support for & in Series 2022-05-24 12:50:05 +05:30
49cebecb88 __and__, __or__ in Series 2022-05-22 17:39:42 +05:30
da0bfcbcb1 Merge branch 'master' of http://192.168.0.114:3000/buddy/fincal 2022-05-22 14:37:04 +05:30
cad069d351 test files 2022-05-22 14:35:07 +05:30
130f4e58e9 started working on transform with aggregation 2022-05-18 21:39:57 -07:00
2ca6167c8b fixed issue with create_date_series
Caused year to go -1 because it was not getting replaced
2022-05-16 22:30:08 +05:30
95e9bfd51c More Sharpe tests 2022-05-16 22:29:18 +05:30
5512a647ad made eomonth parsing more intelligent
Corrected tests and code to account for the same
2022-05-12 10:40:47 +05:30
7e524ccf7a incorporated eomonth in ffill and bfill
More Sharpe tests and ffill/bfill tests
2022-05-10 09:51:12 +05:30
aea6bf9b57 Some tests for Sharpe ratio
Also some corrections based identified during testing
2022-05-08 21:04:48 +05:30
68d854cb3f moved create_test_data to a fixture in conftest.py 2022-05-08 18:09:39 +05:30
0d0b2121a3 Sharpe ratio is working 2022-05-07 14:09:21 +05:30
2a8f5b4041 limit = None in ffill was causing failure 2022-05-01 13:07:19 +05:30
19523519ee Handled returns and SD both getting annualised in SD calc 2022-05-01 13:03:16 +05:30
41562f7e70 returns are not compounded during voloatility calculations 2022-05-01 13:03:00 +05:30
3189e50bd8 removed kwargs and added all arguments 2022-04-30 12:48:31 +05:30
336cf41ca8 added sharpe ratio 2022-04-29 07:43:06 +05:30
0f002f3478 added mean() method 2022-04-29 07:42:39 +05:30
79cd44d41f Implemented limit parameter in ffill and bfill 2022-04-26 10:32:42 +05:30
978566e0a8 Improved TSC.get 2022-04-24 23:47:27 +05:30
c99ffe02d0 changes to getitem to fetch closest date 2022-04-24 18:43:06 +05:30
65f2e8434c More arithmatic tests 2022-04-12 22:40:06 +05:30
e8be7e9efa Math tests and series dtype param name change 2022-04-12 11:43:52 +05:30
49604a5ae9 Series parameter name change 2022-04-12 11:43:11 +05:30
b38a317b82 Added rmath dunder, create Series without specifying dtype
Added Math validator to Series, math dunders pending
2022-04-12 11:42:51 +05:30
03a8045400 implemented arithmatic dunder methods
Writing tests for the same is pending
2022-04-11 22:49:41 +05:30
625c9228e9 Docs and corrections based on tests 2022-04-11 22:19:29 +05:30
3ec5b06e83 Added tests for TSC comparisons 2022-04-11 22:19:17 +05:30
e8dbc16157 implemented comparison in TSC, improved comparisons in Series 2022-04-11 10:47:12 +05:30
b246709603 expanded comparison for series, implemented gt in TSC 2022-04-10 23:52:53 +05:30
09365c7957 added tests for setitem in TSC 2022-04-10 23:51:56 +05:30
7bcd310652 Remove test check if instance is of type Mapping 2022-04-10 14:10:18 +05:30
2ee8f928af preprocess_timeseries converts numbers to float 2022-04-10 14:09:51 +05:30
d4bbaf7903 TimeSeriesCore no longer subclasses UserDict 2022-04-10 14:09:24 +05:30
83a8b05802 implemented setitem 2022-04-10 13:27:25 +05:30
123cc0b204 Added shrink method, documented expand 2022-04-09 22:56:53 +05:30
6547a5e64a added test for sync 2022-04-09 11:58:35 +05:30
22c295ece1 imported in init for easier imports 2022-04-08 22:19:17 +05:30
30749382cf Improved documentation 2022-04-08 10:49:59 +05:30
88e7b3994b implemented sync method 2022-04-07 23:44:18 +05:30
8e8146a07d changed init to super().__init__
getting started with setitem and sync
2022-04-05 23:43:03 +05:30
ce6326f4b5 Refactored with improved type hints 2022-04-05 10:43:53 +05:30
e06626dbca Added more ffill and bfill tests 2022-04-05 10:43:31 +05:30
33feb62015 corrected issue with ffill test 2022-04-04 23:48:06 +05:30
ed973cc259 improved expand function, handled issue with ffill
getitem with ffill was causing date-value pair to be treated as value in ffill
2022-04-04 23:47:34 +05:30
b5aa5d22d4 Added tests for expand 2022-04-04 00:48:55 +05:30
03ccbe0cb1 Added skip weekend parameters to expand and bfill/ffill 2022-04-04 00:48:48 +05:30
fa2ab84c92 renamed test_fincal2 to test_fincal 2022-04-03 15:27:55 +05:30
6ffa52f84e migrated tests from fincal to fincal2 2022-04-03 15:27:07 +05:30
d88acc5888 added read csv function 2022-04-02 07:45:59 +05:30
eb63766c1e Wrote tests for utils.py 2022-03-30 23:06:45 +05:30
faf0b44e46 Added tests for get and more tests for getitem 2022-03-29 10:35:41 +05:30
875089349e Added typehints for head and tail 2022-03-29 10:35:20 +05:30
3b96d231f4 fincal.expand now works by using .get and closest=previous 2022-03-25 08:43:28 +05:30
40 changed files with 4018 additions and 14335 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

4
.gitignore vendored
View File

@ -4,3 +4,7 @@
*egg-info
__pycache__
.vscode
.idea
build
.coverage
.DS_store

View File

@ -1,15 +0,0 @@
# Fincal
This module simplified handling of time-series data
## The problem
Time series data often have missing data points. These missing points mess things up when you are trying to do a comparison between two sections of a time series.
To make things worse, most libraries don't allow comparison based on dates. Month to Month and year to year comparisons become difficult as they cannot be translated into number of days. However, these are commonly used metrics while looking at financial data.
## The Solution
Fincal aims to simplify things by allowing you to:
* Compare time-series data based on dates
* Easy way to work around missing dates by taking the closest data points
* Completing series with missing data points using forward fill and backward fill
## Examples

View File

@ -1,129 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import pandas as pd\n",
"\n",
"from fincal.fincal import TimeSeries\n",
"from fincal.core import Series"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "757eafc2-f804-4e7e-a3b8-2d09cd62e646",
"metadata": {},
"outputs": [],
"source": [
"dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "59b3d4a9-8ef4-4652-9e20-1bac69ab4ff9",
"metadata": {},
"outputs": [],
"source": [
"dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4bc95ae0-8c33-4eab-acf9-e765d22979b8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: The input data contains duplicate dates which have been ignored.\n"
]
}
],
"source": [
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f2c3218c-3984-43d6-8638-41a74a9d0b58",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "getattr(): attribute name must be string",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m<timed eval>:1\u001b[0m, in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n",
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:203\u001b[0m, in \u001b[0;36mTimeSeries.calculate_rolling_returns\u001b[1;34m(self, from_date, to_date, frequency, as_on_match, prior_match, closest, compounding, years)\u001b[0m\n\u001b[0;32m 200\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid argument for frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfrequency\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 203\u001b[0m dates \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_date_series\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfrom_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mto_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m frequency \u001b[38;5;241m==\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mD:\n\u001b[0;32m 205\u001b[0m dates \u001b[38;5;241m=\u001b[39m [i \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m dates \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtime_series]\n",
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:16\u001b[0m, in \u001b[0;36mcreate_date_series\u001b[1;34m(start_date, end_date, frequency, eomonth)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_date_series\u001b[39m(\n\u001b[0;32m 12\u001b[0m start_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, end_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, frequency: \u001b[38;5;28mstr\u001b[39m, eomonth: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 13\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[datetime\u001b[38;5;241m.\u001b[39mdatetime]:\n\u001b[0;32m 14\u001b[0m \u001b[38;5;124;03m\"\"\"Creates a date series using a frequency\"\"\"\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m frequency \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mAllFrequencies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m eomonth \u001b[38;5;129;01mand\u001b[39;00m frequency\u001b[38;5;241m.\u001b[39mdays \u001b[38;5;241m<\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mdays:\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meomonth cannot be set to True if frequency is higher than \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mAllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mTypeError\u001b[0m: getattr(): attribute name must be string"
]
}
],
"source": [
"%%time\n",
"ts.calculate_rolling_returns(from_date='2020-01-01', to_date='2021-01-01')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

259
README.md
View File

@ -1,34 +1,253 @@
# Fincal
This module simplified handling of time-series data
# PyFacts
PyFacts stands for Python library for Financial analysis and computations on time series. It is a library which makes it simple to work with time series data.
Most libraries, and languages like SQL, work with rows. Operations are performed by rows and not by dates. For instance, to calculate 1-year rolling returns in SQL, you are forced to use either a lag of 365/252 rows, leading to an approximation, or slow and cumbersome joins. PyFacts solves this by allowing you to work with dates and time intervals. Hence, to calculate 1-year returns, you will be specifying a lag of 1-year and the library will do the grunt work of finding the most appropriate observations to calculate these returns on.
## The problem
Time series data often have missing data points. These missing points mess things up when you are trying to do a comparison between two sections of a time series.
To make things worse, most libraries don't allow comparison based on dates. Month to Month and year to year comparisons become difficult as they cannot be translated into number of days. However, these are commonly used metrics while looking at financial data.
Libraries and languages usually don't allow comparison based on dates. Calculating month on month or year on year returns are always cumbersome as users are forced to rely on row lags. However, data always have inconsistencies, especially financial data. Markets don't work on weekends, there are off days, data doesn't get released on a few days a year, data availability is patchy when dealing with 40-year old data. All these problems are exacerbated when you are forced to make calculations using lag.
## The Solution
Fincal aims to simplify things by allowing you to:
* Compare time-series data based on dates
* Easy way to work around missing dates by taking the closest data points
* Completing series with missing data points using forward fill and backward fill
## Examples
PyFacts aims to simplify things by allowing you to:
- Compare time-series data based on dates and time-period-based lag
- Easy way to work around missing dates by taking the closest data points
- Completing series with missing data points using forward fill and backward fill
- Use friendly dates everywhere written as a simple string
## Creating a time series
Time series data can be created from a dictionary, a list of lists/tuples/dicts, or by reading a csv file.
Example:
```
>>> import pyfacts as pft
>>> time_series_data = [
... ('2021-01-01', 10),
... ('2021-02-01', 12),
... ('2021-03-01', 14),
... ('2021-04-01', 16),
... ('2021-05-01', 18),
... ('2021-06-01', 20)
...]
>>> ts = pft.TimeSeries(time_series_data)
```
### Sample usage
```
>>> ts.calculate_returns(as_on='2021-04-01', return_period_unit='months', return_period_value=3, annual_compounded_returns=False)
(datetime.datetime(2021, 4, 1, 0, 0), 0.6)
>>> ts.calculate_returns(as_on='2021-04-15', return_period_unit='months', return_period_value=3, annual_compounded_returns=False)
(datetime.datetime(2021, 4, 1, 0, 0), 0.6)
```
### Working with dates
With PyFacts, you never have to go into the hassle of creating datetime objects for your time series. PyFacts will parse any date passed to it as string. The default format is ISO format, i.e., YYYY-MM-DD. However, you can use your preferred format simply by specifying it in the options in datetime library compatible format, after importing the library. For example, to use DD-MM-YYY format:
```
>>> import pyfacts as pft
>>> pft.PyfactsOptions.date_format = '%d-%m-%Y'
```
Now the library will automatically parse all dates as DD-MM-YYYY
If you happen to have any one situation where you need to use a different format, all methods accept a date_format parameter to override the default.
### Working with multiple time series
While working with time series data, you will often need to perform calculations on the data. PyFacts supports all kinds of mathematical operations on time series.
Example:
```
>>> import pyfacts as pft
>>> time_series_data = [
... ('2021-01-01', 10),
... ('2021-02-01', 12),
... ('2021-03-01', 14),
... ('2021-04-01', 16),
... ('2021-05-01', 18),
... ('2021-06-01', 20)
...]
>>> ts = pft.TimeSeries(time_series_data)
>>> print(ts/100)
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 0.1),
(datetime.datetime(2022, 1, 2, 0, 0), 0.12),
(datetime.datetime(2022, 1, 3, 0, 0), 0.14),
(datetime.datetime(2022, 1, 4, 0, 0), 0.16),
(datetime.datetime(2022, 1, 6, 0, 0), 0.18),
(datetime.datetime(2022, 1, 7, 0, 0), 0.2)], frequency='M')
```
Mathematical operations can also be done between time series as long as they have the same dates.
Example:
```
>>> import pyfacts as pft
>>> time_series_data = [
... ('2021-01-01', 10),
... ('2021-02-01', 12),
... ('2021-03-01', 14),
... ('2021-04-01', 16),
... ('2021-05-01', 18),
... ('2021-06-01', 20)
...]
>>> ts = pft.TimeSeries(time_series_data)
>>> ts2 = pft.TimeSeries(time_series_data)
>>> print(ts/ts2)
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 1.0),
(datetime.datetime(2022, 1, 2, 0, 0), 1.0),
(datetime.datetime(2022, 1, 3, 0, 0), 1.0),
(datetime.datetime(2022, 1, 4, 0, 0), 1.0),
(datetime.datetime(2022, 1, 6, 0, 0), 1.0),
(datetime.datetime(2022, 1, 7, 0, 0), 1.0)], frequency='M')
```
However, if the dates are not in sync, PyFacts provides convenience methods for syncronising dates.
Example:
```
>>> import pyfacts as pft
>>> data1 = [
... ('2021-01-01', 10),
... ('2021-02-01', 12),
... ('2021-03-01', 14),
... ('2021-04-01', 16),
... ('2021-05-01', 18),
... ('2021-06-01', 20)
...]
>>> data2 = [
... ("2022-15-01", 20),
... ("2022-15-02", 22),
... ("2022-15-03", 24),
... ("2022-15-04", 26),
... ("2022-15-06", 28),
... ("2022-15-07", 30)
...]
>>> ts = pft.TimeSeries(data, frequency='M', date_format='%Y-%d-%m')
>>> ts2 = pft.TimeSeries(data2, frequency='M', date_format='%Y-%d-%m')
>>> ts.sync(ts2, fill_method='bfill') # Sync ts2 with ts1
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 20.0),
(datetime.datetime(2022, 2, 1, 0, 0), 22.0),
(datetime.datetime(2022, 3, 1, 0, 0), 24.0),
(datetime.datetime(2022, 4, 1, 0, 0), 26.0),
(datetime.datetime(2022, 6, 1, 0, 0), 28.0),
(datetime.datetime(2022, 7, 1, 0, 0), 30.0)], frequency='M')
```
Even if you need to perform calculations on data with different frequencies, PyFacts will let you easily handle this with the expand and shrink methods.
Example:
```
>>> data = [
... ("2022-01-01", 10),
... ("2022-02-01", 12),
... ("2022-03-01", 14),
... ("2022-04-01", 16),
... ("2022-05-01", 18),
... ("2022-06-01", 20)
...]
>>> ts = pft.TimeSeries(data, 'M')
>>> ts.expand(to_frequency='W', method='ffill')
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 10.0),
(datetime.datetime(2022, 1, 8, 0, 0), 10.0),
(datetime.datetime(2022, 1, 15, 0, 0), 10.0)
...
(datetime.datetime(2022, 5, 14, 0, 0), 18.0),
(datetime.datetime(2022, 5, 21, 0, 0), 18.0),
(datetime.datetime(2022, 5, 28, 0, 0), 18.0)], frequency='W')
>>> ts.shrink(to_frequency='Q', method='ffill')
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 10.0),
(datetime.datetime(2022, 4, 1, 0, 0), 16.0)], frequency='Q')
```
If you want to shorten the timeframe of the data with an aggregation function, the transform method will help you out. Currently it supports sum and mean.
Example:
```
>>> data = [
... ("2022-01-01", 10),
... ("2022-02-01", 12),
... ("2022-03-01", 14),
... ("2022-04-01", 16),
... ("2022-05-01", 18),
... ("2022-06-01", 20),
... ("2022-07-01", 22),
... ("2022-08-01", 24),
... ("2022-09-01", 26),
... ("2022-10-01", 28),
... ("2022-11-01", 30),
... ("2022-12-01", 32)
...]
>>> ts = pft.TimeSeries(data, 'M')
>>> ts.transform(to_frequency='Q', method='sum')
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 36.0),
(datetime.datetime(2022, 4, 1, 0, 0), 54.0),
(datetime.datetime(2022, 7, 1, 0, 0), 72.0),
(datetime.datetime(2022, 10, 1, 0, 0), 90.0)], frequency='Q')
>>> ts.transform(to_frequency='Q', method='mean')
TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 12.0),
(datetime.datetime(2022, 4, 1, 0, 0), 18.0),
(datetime.datetime(2022, 7, 1, 0, 0), 24.0),
(datetime.datetime(2022, 10, 1, 0, 0), 30.0)], frequency='Q')
```
## To-do
### Core features
- [ ] Add __setitem__
- [x] Add **setitem**
- [ ] Create emtpy TimeSeries object
- [ ] Read from CSV
- [x] Read from CSV
- [ ] Write to CSV
- [ ] Convert to dict
- [ ] Convert to list of dicts
### Fincal features
- [ ] Sync two TimeSeries
- [ ] Average rolling return
- [ ] Sharpe ratio
- [ ] Jensen's Alpha
- [ ] Beta
- [ ] Max drawdown
- [x] Convert to dict
- [x] Convert to list of tuples
### pyfacts features
- [x] Sync two TimeSeries
- [x] Average rolling return
- [x] Sharpe ratio
- [x] Jensen's Alpha
- [x] Beta
- [x] Sortino ratio
- [x] Correlation & R-squared
- [ ] Treynor ratio
- [x] Max drawdown
- [ ] Moving average
### Pending implementation
- [x] Use limit parameter in ffill and bfill
- [x] Implementation of ffill and bfill may be incorrect inside expand, check and correct
- [ ] Implement interpolation in expand

View File

@ -1,29 +0,0 @@
import pandas
from fincal.fincal import TimeSeries
dfd = pandas.read_csv('test_files/nav_history_daily - Copy.csv')
dfm = pandas.read_csv('test_files/nav_history_monthly.csv')
data_d = [(i.date, i.nav) for i in dfd.itertuples() if i.amfi_code == 118825]
data_d.sort()
data_m = [{'date': i.date, 'value': i.nav} for i in dfm.itertuples()]
tsd = TimeSeries(data_d, frequency='D')
md = dict(data_d)
counter = 1
for i in iter(md):
print(i)
counter += 1
if counter >= 5: break
print('\n')
counter = 1
for i in reversed(md):
print('rev', i)
counter += 1
if counter >= 5: break
x = [next(i) for i in iter(md)]
print(x)

View File

@ -1 +0,0 @@
from fincal import *

View File

@ -1,20 +0,0 @@
import sys
def main(args=None):
"""The main routine."""
if args is None:
args = sys.argv[1:]
print("This is the main routine.")
print("It should do something interesting.")
print("This is the name of the script: ", sys.argv[0])
print("Number of arguments: ", len(sys.argv))
print("The arguments are: ", str(sys.argv))
# Do argument parsing here with argparse
if __name__ == "__main__":
main()

View File

@ -1,437 +0,0 @@
import datetime
import inspect
from collections import UserDict, UserList
from dataclasses import dataclass
from numbers import Number
from typing import Iterable, List, Literal, Mapping, Sequence, Union
from .utils import FincalOptions, _parse_date, _preprocess_timeseries
@dataclass(frozen=True)
class Frequency:
name: str
freq_type: str
value: int
days: int
symbol: str
def date_parser(*pos):
"""Decorator to parse dates in any function
Accepts the 0-indexed position of the parameter for which date parsing needs to be done.
Works even if function is used with keyword arguments while not maintaining parameter order.
Example:
--------
>>> @date_parser(2, 3)
>>> def calculate_difference(diff_units='days', return_type='int', date1, date2):
... diff = date2 - date1
... if return_type == 'int':
... return diff.days
... return diff
...
>>> calculate_difference(date1='2019-01-01', date2='2020-01-01')
datetime.timedelta(365)
Each of the dates is automatically parsed into a datetime.datetime object from string.
"""
def parse_dates(func):
def wrapper_func(*args, **kwargs):
date_format = kwargs.get("date_format", None)
args = list(args)
sig = inspect.signature(func)
params = [i[0] for i in sig.parameters.items()]
for j in pos:
kwarg = params[j]
date = kwargs.get(kwarg, None)
in_args = False
if date is None:
try:
date = args[j]
except IndexError:
pass
in_args = True
if date is None:
continue
parsed_date = _parse_date(date, date_format)
if not in_args:
kwargs[kwarg] = parsed_date
else:
args[j] = parsed_date
return func(*args, **kwargs)
return wrapper_func
return parse_dates
class AllFrequencies:
D = Frequency("daily", "days", 1, 1, "D")
W = Frequency("weekly", "days", 7, 7, "W")
M = Frequency("monthly", "months", 1, 30, "M")
Q = Frequency("quarterly", "months", 3, 91, "Q")
H = Frequency("half-yearly", "months", 6, 182, "H")
Y = Frequency("annual", "years", 1, 365, "Y")
class _IndexSlicer:
"""Class to create a slice using iloc in TimeSeriesCore"""
def __init__(self, parent_obj: object):
self.parent = parent_obj
def __getitem__(self, n):
if isinstance(n, int):
keys = [self.parent.dates[n]]
else:
keys = self.parent.dates[n]
item = [(key, self.parent.data[key]) for key in keys]
if len(item) == 1:
return item[0]
return self.parent.__class__(item, self.parent.frequency.symbol)
class Series(UserList):
"""Container for a series of objects, all objects must be of the same type"""
def __init__(
self,
data,
data_type: Literal["date", "number", "bool"],
date_format: str = None,
):
types_dict = {
"date": datetime.datetime,
"datetime": datetime.datetime,
"datetime.datetime": datetime.datetime,
"float": float,
"int": float,
"number": float,
"bool": bool,
}
if data_type not in types_dict.keys():
raise ValueError("Unsupported value for data type")
if not isinstance(data, Sequence):
raise TypeError("Series object can only be created using Sequence types")
if data_type in ["date", "datetime", "datetime.datetime"]:
data = [_parse_date(i, date_format) for i in data]
else:
func = types_dict[data_type]
data = [func(i) for i in data]
self.dtype = types_dict[data_type]
self.data = data
def __repr__(self):
return f"{self.__class__.__name__}({self.data}, data_type='{self.dtype.__name__}')"
def __getitem__(self, i):
if isinstance(i, slice):
return self.__class__(self.data[i], str(self.dtype.__name__))
else:
return self.data[i]
def __gt__(self, other):
if self.dtype == bool:
raise TypeError("> not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
gt = Series([i > other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return gt
def __ge__(self, other):
if self.dtype == bool:
raise TypeError(">= not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
ge = Series([i >= other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return ge
def __lt__(self, other):
if self.dtype == bool:
raise TypeError("< not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
lt = Series([i < other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return lt
def __le__(self, other):
if self.dtype == bool:
raise TypeError("<= not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
le = Series([i <= other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return le
def __eq__(self, other):
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
eq = Series([i == other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return eq
class TimeSeriesCore(UserDict):
"""Defines the core building blocks of a TimeSeries object"""
def __init__(
self, data: List[Iterable], frequency: Literal["D", "W", "M", "Q", "H", "Y"], date_format: str = "%Y-%m-%d"
):
"""Instantiate a TimeSeriesCore object
Parameters
----------
data : List[tuple]
Time Series data in the form of list of tuples.
The first element of each tuple should be a date and second element should be a value.
date_format : str, optional, default "%Y-%m-%d"
Specify the format of the date
Required only if the first argument of tuples is a string. Otherwise ignored.
frequency : str, optional, default "infer"
The frequency of the time series. Default is infer.
The class will try to infer the frequency automatically and adjust to the closest member.
Note that inferring frequencies can fail if the data is too irregular.
Valid values are {D, W, M, Q, H, Y}
"""
data = _preprocess_timeseries(data, date_format=date_format)
self.data = dict(data)
if len(self.data) != len(data):
print("Warning: The input data contains duplicate dates which have been ignored.")
self.frequency: Frequency = getattr(AllFrequencies, frequency)
self.iter_num: int = -1
self._dates: list = None
self._values: list = None
self._start_date: datetime.datetime = None
self._end_date: datetime.datetime = None
@property
def dates(self) -> Series:
"""Get a list of all the dates in the TimeSeries object"""
if self._dates is None or len(self._dates) != len(self.data):
self._dates = list(self.data.keys())
return Series(self._dates, "date")
@property
def values(self) -> Series:
"""Get a list of all the Values in the TimeSeries object"""
if self._values is None or len(self._values) != len(self.data):
self._values = list(self.data.values())
return Series(self._values, "number")
@property
def start_date(self) -> datetime.datetime:
"""The first date in the TimeSeries object"""
return self.dates[0]
@property
def end_date(self) -> datetime.datetime:
"""The last date in the TimeSeries object"""
return self.dates[-1]
def _get_printable_slice(self, n: int):
"""Helper function for __repr__ and __str__
Returns a slice of the dataframe from beginning and end.
"""
printable = {}
iter_f = iter(self.data)
first_n = [next(iter_f) for i in range(n // 2)]
iter_b = reversed(self.data)
last_n = [next(iter_b) for i in range(n // 2)]
last_n.sort()
printable["start"] = [str((i, self.data[i])) for i in first_n]
printable["end"] = [str((i, self.data[i])) for i in last_n]
return printable
def __repr__(self):
if len(self.data) > 6:
printable = self._get_printable_slice(6)
printable_str = "{}([{}\n\t ...\n\t {}], frequency={})".format(
self.__class__.__name__,
",\n\t ".join(printable["start"]),
",\n\t ".join(printable["end"]),
repr(self.frequency.symbol),
)
else:
printable_str = "{}([{}], frequency={})".format(
self.__class__.__name__,
",\n\t".join([str(i) for i in self.data.items()]),
repr(self.frequency.symbol),
)
return printable_str
def __str__(self):
if len(self.data) > 6:
printable = self._get_printable_slice(6)
printable_str = "[{}\n ...\n {}]".format(
",\n ".join(printable["start"]),
",\n ".join(printable["end"]),
)
else:
printable_str = "[{}]".format(",\n ".join([str(i) for i in self.data.items()]))
return printable_str
@date_parser(1)
def _get_item_from_date(self, date: Union[str, datetime.datetime]):
return date, self.data[date]
def _get_item_from_key(self, key: Union[str, datetime.datetime]):
if isinstance(key, int):
raise KeyError(f"{key}. \nHint: use .iloc[{key}] for index based slicing.")
if key in ["dates", "values"]:
return getattr(self, key)
return self._get_item_from_date(key)
def _get_item_from_list(self, date_list: Sequence[Union[str, datetime.datetime]]):
data_to_return = [self._get_item_from_key(key) for key in date_list]
return self.__class__(data_to_return, frequency=self.frequency.symbol)
def _get_item_from_series(self, series: Series):
if series.dtype == bool:
if len(series) != len(self.dates):
raise ValueError(f"Length of Series: {len(series)} did not match length of object: {len(self.dates)}")
dates_to_return = [self.dates[i] for i, j in enumerate(series) if j]
elif series.dtype == datetime.datetime:
dates_to_return = list(series)
else:
raise TypeError(f"Cannot slice {self.__class__.__name__} using a Series of {series.dtype.__name__}")
return self._get_item_from_list(dates_to_return)
def __getitem__(self, key):
if isinstance(key, (int, str, datetime.datetime, datetime.date)):
return self._get_item_from_key(key)
if isinstance(key, Series):
return self._get_item_from_series(key)
if isinstance(key, Sequence):
return self._get_item_from_list(key)
raise TypeError(f"Invalid type {repr(type(key).__name__)} for slicing.")
def __iter__(self):
self.n = 0
return self
def __next__(self):
if self.n >= len(self.dates):
raise StopIteration
else:
key = self.dates[self.n]
self.n += 1
return key, self.data[key]
@date_parser(1)
def __contains__(self, key: object) -> bool:
return super().__contains__(key)
@date_parser(1)
def get(self, date: Union[str, datetime.datetime], default=None, closest=None):
if closest is None:
closest = FincalOptions.get_closest
if closest == "exact":
try:
item = self._get_item_from_date(date)
return item
except KeyError:
return default
if closest == "previous":
delta = datetime.timedelta(-1)
elif closest == "next":
delta = datetime.timedelta(1)
else:
raise ValueError(f"Invalid argument from closest {closest!r}")
while True:
try:
item = self._get_item_from_date(date)
return item
except KeyError:
date += delta
@property
def iloc(self) -> Mapping:
"""Returns an item or a set of items based on index
supports slicing using numerical index.
Accepts integers or Python slice objects
Usage
-----
>>> ts = TimeSeries(data, frequency='D')
>>> ts.iloc[0] # get the first value
>>> ts.iloc[-1] # get the last value
>>> ts.iloc[:3] # get the first 3 values
>>> ts.illoc[-3:] # get the last 3 values
>>> ts.iloc[5:10] # get five values starting from the fifth value
>>> ts.iloc[::2] # get every alternate date
"""
return _IndexSlicer(self)
def head(self, n: int = 6):
"""Returns the first n items of the TimeSeries object"""
return self.iloc[:n]
def tail(self, n: int = 6):
"""Returns the last n items of the TimeSeries object"""
return self.iloc[-n:]
def items(self):
return self.data.items()

View File

@ -1,124 +0,0 @@
import datetime
from dataclasses import dataclass
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
from .exceptions import DateNotFoundError, DateOutOfRangeError
@dataclass
class FincalOptions:
date_format: str = "%Y-%m-%d"
closest: str = "before" # after
traded_days: int = 365
get_closest: str = "exact"
def _parse_date(date: str, date_format: str = None):
"""Parses date and handles errors"""
# print(date, date_format)
if isinstance(date, (datetime.datetime, datetime.date)):
return datetime.datetime.fromordinal(date.toordinal())
if date_format is None:
date_format = FincalOptions.date_format
try:
date = datetime.datetime.strptime(date, date_format)
except TypeError:
raise ValueError("Date does not seem to be valid date-like string")
except ValueError:
raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
return date
def _preprocess_timeseries(
data: Union[
Sequence[Iterable[Union[str, datetime.datetime, float]]],
Sequence[Mapping[str, Union[float, datetime.datetime]]],
Sequence[Mapping[Union[str, datetime.datetime], float]],
Mapping[Union[str, datetime.datetime], float],
],
date_format: str,
) -> List[Tuple[datetime.datetime, float]]:
"""Converts any type of list to the correct type"""
if isinstance(data, Mapping):
current_data = [(k, v) for k, v in data.items()]
return _preprocess_timeseries(current_data, date_format)
if not isinstance(data, Sequence):
raise TypeError("Could not parse the data")
if isinstance(data[0], Sequence):
return sorted([(_parse_date(i, date_format), j) for i, j in data])
if not isinstance(data[0], Mapping):
raise TypeError("Could not parse the data")
if len(data[0]) == 1:
current_data = [tuple(*i.items()) for i in data]
elif len(data[0]) == 2:
current_data = [tuple(i.values()) for i in data]
else:
raise TypeError("Could not parse the data")
return _preprocess_timeseries(current_data, date_format)
def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta:
"""Checks the arguments and returns appropriate timedelta objects"""
deltas = {"exact": 0, "previous": -1, "next": 1}
if closest not in deltas.keys():
raise ValueError(f"Invalid argument for closest: {closest}")
as_on_match = closest if as_on_match == "closest" else as_on_match
prior_match = closest if prior_match == "closest" else prior_match
if as_on_match in deltas.keys():
as_on_delta = datetime.timedelta(days=deltas[as_on_match])
else:
raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
if prior_match in deltas.keys():
prior_delta = datetime.timedelta(days=deltas[prior_match])
else:
raise ValueError(f"Invalid prior_match argument: {prior_match}")
return as_on_delta, prior_delta
def _find_closest_date(
data: Mapping[datetime.datetime, float],
date: datetime.datetime,
limit_days: int,
delta: datetime.timedelta,
if_not_found: Literal["fail", "nan"],
):
"""Helper function to find data for the closest available date"""
if delta.days < 0 and date < min(data):
raise DateOutOfRangeError(date, "min")
if delta.days > 0 and date > max(data):
raise DateOutOfRangeError(date, "max")
row = data.get(date, None)
if row is not None:
return date, row
if delta and limit_days != 0:
return _find_closest_date(data, date + delta, limit_days - 1, delta, if_not_found)
if if_not_found == "fail":
raise DateNotFoundError("Data not found for date", date)
if if_not_found == "nan":
return date, float("NaN")
raise ValueError(f"Invalid argument for if_not_found: {if_not_found}")
def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> int:
"""Converts any time period to years for use with compounding functions"""
year_conversion_factor = {"years": 1, "months": 12, "days": 365}
years = interval_value / year_conversion_factor[interval_type]
return years

26
my_checks.py Normal file
View File

@ -0,0 +1,26 @@
import datetime
import time
import timeit
import pandas
from pyfacts.pyfacts import AllFrequencies, TimeSeries, create_date_series
dfd = pandas.read_csv("test_files/msft.csv")
dfm = pandas.read_csv("test_files/nav_history_monthly.csv")
dfq = pandas.read_csv("test_files/nav_history_quarterly.csv")
data_d = [(i.date, i.nav) for i in dfd.itertuples()]
data_m = [{"date": i.date, "value": i.nav} for i in dfm.itertuples()]
data_q = {i.date: i.nav for i in dfq.itertuples()}
data_q.update({"14-02-2022": 93.7})
tsd = TimeSeries(data_d, frequency="D")
tsm = TimeSeries(data_m, frequency="M", date_format="%d-%m-%Y")
tsq = TimeSeries(data_q, frequency="Q", date_format="%d-%m-%Y")
start = time.time()
# ts.calculate_rolling_returns(datetime.datetime(2015, 1, 1), datetime.datetime(2022, 2, 1), years=1)
bdata = tsq.bfill()
# rr = tsd.calculate_rolling_returns(datetime.datetime(2022, 1, 1), datetime.datetime(2022, 2, 1), years=1)
print(time.time() - start)

View File

@ -1,26 +0,0 @@
import datetime
import time
import timeit
import pandas
from fincal.fincal import AllFrequencies, TimeSeries, create_date_series
dfd = pandas.read_csv('test_files/msft.csv')
dfm = pandas.read_csv('test_files/nav_history_monthly.csv')
dfq = pandas.read_csv('test_files/nav_history_quarterly.csv')
data_d = [(i.date, i.nav) for i in dfd.itertuples()]
data_m = [{'date': i.date, 'value': i.nav} for i in dfm.itertuples()]
data_q = {i.date: i.nav for i in dfq.itertuples()}
data_q.update({'14-02-2022': 93.7})
tsd = TimeSeries(data_d, frequency='D')
tsm = TimeSeries(data_m, frequency='M', date_format='%d-%m-%Y')
tsq = TimeSeries(data_q, frequency='Q', date_format='%d-%m-%Y')
start = time.time()
# ts.calculate_rolling_returns(datetime.datetime(2015, 1, 1), datetime.datetime(2022, 2, 1), years=1)
bdata = tsq.bfill()
# rr = tsd.calculate_rolling_returns(datetime.datetime(2022, 1, 1), datetime.datetime(2022, 2, 1), years=1)
print(time.time() - start)

27
pyfacts/__init__.py Normal file
View File

@ -0,0 +1,27 @@
from .core import *
from .pyfacts import *
from .statistics import *
from .utils import *
__author__ = "Gourav Kumar"
__email__ = "gouravkr@outlook.in"
__version__ = "0.0.1"
__doc__ = """
PyFacts stands for Python library for Financial analysis and computations on time series.
It is a library which makes it simple to work with time series data.
Most libraries, and languages like SQL, work with rows. Operations are performed by rows
and not by dates. For instance, to calculate 1-year rolling returns in SQL, you are forced
to use either a lag of 365/252 rows, leading to an approximation, or slow and cumbersome
joins. PyFacts solves this by allowing you to work with dates and time intervals. Hence,
to calculate 1-year returns, you will be specifying a lag of 1-year and the library will
do the grunt work of finding the most appropriate observations to calculate these returns on.
PyFacts aims to simplify things by allowing you to:
* Compare time-series data based on dates and time-period-based lag
* Easy way to work around missing dates by taking the closest data points
* Completing series with missing data points using forward fill and backward fill
* Use friendly dates everywhere written as a simple string
"""

1017
pyfacts/core.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -13,9 +13,9 @@ class DateNotFoundError(Exception):
class DateOutOfRangeError(Exception):
"""Exception to be raised when provided date is outside the range of dates in the time series"""
def __init__(self, date: datetime.datetime, type: Literal['min', 'max']) -> None:
if type == 'min':
def __init__(self, date: datetime.datetime, type: Literal["min", "max"]) -> None:
if type == "min":
message = f"Provided date {date} is before the first date in the TimeSeries"
if type == 'max':
if type == "max":
message = f"Provided date {date} is after the last date in the TimeSeries"
super().__init__(message)

View File

@ -1,17 +1,20 @@
from __future__ import annotations
import csv
import datetime
import math
import pathlib
import statistics
from typing import Iterable, List, Literal, Mapping, TypedDict, Union
from typing import Iterable, List, Literal, Mapping, Tuple, TypedDict
from dateutil.relativedelta import relativedelta
from .core import AllFrequencies, Frequency, Series, TimeSeriesCore, date_parser
from .utils import (
FincalOptions,
PyfactsOptions,
_find_closest_date,
_interval_to_years,
_is_eomonth,
_preprocess_match_options,
)
@ -24,12 +27,13 @@ class MaxDrawdown(TypedDict):
@date_parser(0, 1)
def create_date_series(
start_date: Union[str, datetime.datetime],
end_date: Union[str, datetime.datetime],
start_date: str | datetime.datetime,
end_date: str | datetime.datetime,
frequency: Literal["D", "W", "M", "Q", "H", "Y"],
eomonth: bool = False,
skip_weekends: bool = False,
) -> List[datetime.datetime]:
ensure_coverage: bool = False,
) -> Series:
"""Create a date series with a specified frequency
Parameters
@ -50,6 +54,13 @@ def create_date_series(
Speacifies if the dates in the series should be end of month dates.
Can only be used if the frequency is Monthly or lower.
skip_weekends: Boolean, default False
If set to True, dates falling on weekends will not be added to the series.
Used only when frequency is daily, weekends will necessarily be included for other frequencies.
ensure_coverage: Boolean, default False
If set to true, it will ensure the last date is greater than the end date.
Returns
-------
List[datetime.datetime]
@ -65,24 +76,30 @@ def create_date_series(
if eomonth and frequency.days < AllFrequencies.M.days:
raise ValueError(f"eomonth cannot be set to True if frequency is higher than {AllFrequencies.M.name}")
datediff = (end_date - start_date).days / frequency.days + 1
dates = []
for i in range(0, int(datediff)):
diff = {frequency.freq_type: frequency.value * i}
counter = 0
while counter < 100000:
diff = {frequency.freq_type: frequency.value * counter}
date = start_date + relativedelta(**diff)
if eomonth:
next_month = 1 if date.month == 12 else date.month + 1
date = date.replace(day=1).replace(month=next_month) - relativedelta(days=1)
date += relativedelta(months=1, day=1, days=-1)
if date <= end_date:
if date > end_date:
if not ensure_coverage:
break
elif dates[-1] >= end_date:
break
counter += 1
if frequency.days > 1 or not skip_weekends:
dates.append(date)
elif date.weekday() < 5:
dates.append(date)
else:
raise ValueError("Cannot generate a series containing more than 100000 dates")
return Series(dates, data_type="date")
return Series(dates, dtype="date")
class TimeSeries(TimeSeriesCore):
@ -100,26 +117,34 @@ class TimeSeries(TimeSeriesCore):
* List of dictionaries with 2 keys, first representing date & second representing value
* Dictionary of key: value pairs
date_format : str, optional, default "%Y-%m-%d"
Specify the format of the date
Required only if the first argument of tuples is a string. Otherwise ignored.
frequency : str, optional, default "infer"
The frequency of the time series. Default is infer.
The class will try to infer the frequency automatically and adjust to the closest member.
Note that inferring frequencies can fail if the data is too irregular.
Valid values are {D, W, M, Q, H, Y}
validate_frequency: boolean, default True
Whether the provided frequency should be validated against the data.
When set to True, if the expected number of data points are not withint the expected limits,
it will raise an Exception and object creation will fail.
This parameter will be ignored if frequency is not provided.
refer core._validate_frequency for more details.
date_format : str, optional, default "%Y-%m-%d"
Specify the format of the date
Required only if the first argument of tuples is a string. Otherwise ignored.
"""
def __init__(
self,
data: Union[List[Iterable], Mapping],
frequency: Literal["D", "W", "M", "Q", "H", "Y"],
data: List[Iterable] | Mapping,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
validate_frequency: bool = True,
date_format: str = "%Y-%m-%d",
):
"""Instantiate a TimeSeriesCore object"""
super().__init__(data, frequency, date_format)
super().__init__(data, frequency, validate_frequency, date_format)
def info(self) -> str:
"""Summary info about the TimeSeries object"""
@ -128,7 +153,9 @@ class TimeSeries(TimeSeriesCore):
res_string: str = "First date: {}\nLast date: {}\nNumber of rows: {}"
return res_string.format(self.start_date, self.end_date, total_dates)
def ffill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]:
def ffill(
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = None
) -> TimeSeries | None:
"""Forward fill missing dates in the time series
Parameters
@ -139,21 +166,32 @@ class TimeSeries(TimeSeriesCore):
limit : int, optional
Maximum number of periods to forward fill
skip_weekends: bool, optional, default false
Skip weekends while forward filling daily data
Returns
-------
Returns a TimeSeries object if inplace is False, otherwise None
"""
if eomonth is None:
eomonth = _is_eomonth(self.dates)
eomonth: bool = True if self.frequency.days >= AllFrequencies.M.days else False
dates_to_fill = create_date_series(self.start_date, self.end_date, self.frequency.symbol, eomonth)
dates_to_fill = create_date_series(
self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends
)
new_ts = dict()
counter = 0
for cur_date in dates_to_fill:
try:
cur_val = self.data[cur_date]
new_val = self[cur_date]
cur_val = new_val
counter = 0
except KeyError:
pass
new_ts.update({cur_date: cur_val})
if counter >= limit:
continue
counter += 1
new_ts.update({cur_date: cur_val[1]})
if inplace:
self.data = new_ts
@ -161,7 +199,9 @@ class TimeSeries(TimeSeriesCore):
return self.__class__(new_ts, frequency=self.frequency.symbol)
def bfill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]:
def bfill(
self, inplace: bool = False, limit: int = 1000, skip_weekends: bool = False, eomonth: bool = None
) -> TimeSeries | None:
"""Backward fill missing dates in the time series
Parameters
@ -172,23 +212,35 @@ class TimeSeries(TimeSeriesCore):
limit : int, optional
Maximum number of periods to back fill
skip_weekends: bool, optional, default false
Skip weekends while forward filling daily data
Returns
-------
Returns a TimeSeries object if inplace is False, otherwise None
"""
if eomonth is None:
eomonth = _is_eomonth(self.dates)
eomonth: bool = True if self.frequency.days >= AllFrequencies.M.days else False
dates_to_fill = create_date_series(self.start_date, self.end_date, self.frequency.symbol, eomonth)
dates_to_fill = create_date_series(
self.start_date, self.end_date, self.frequency.symbol, eomonth, skip_weekends=skip_weekends
)
dates_to_fill.append(self.end_date)
bfill_ts = dict()
counter = 0
for cur_date in reversed(dates_to_fill):
try:
cur_val = self.data[cur_date]
new_val = self[cur_date]
cur_val = new_val
counter = 0
except KeyError:
pass
bfill_ts.update({cur_date: cur_val})
new_ts = {k: bfill_ts[k] for k in reversed(bfill_ts)}
if counter >= limit:
continue
counter += 1
bfill_ts.update({cur_date: cur_val[1]})
# new_ts = {k: bfill_ts[k] for k in reversed(bfill_ts)}
new_ts = dict(list(reversed(bfill_ts.items())))
if inplace:
self.data = new_ts
return None
@ -198,7 +250,7 @@ class TimeSeries(TimeSeriesCore):
@date_parser(1)
def calculate_returns(
self,
as_on: Union[str, datetime.datetime],
as_on: str | datetime.datetime,
return_actual_date: bool = True,
as_on_match: str = "closest",
prior_match: str = "closest",
@ -209,7 +261,7 @@ class TimeSeries(TimeSeriesCore):
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
date_format: str = None,
) -> float:
) -> Tuple[datetime.datetime, float]:
"""Method to calculate returns for a certain time-period as on a particular date
Parameters
@ -242,7 +294,7 @@ class TimeSeries(TimeSeriesCore):
* fail: Raise a ValueError
* nan: Return nan as the value
compounding : bool, optional
annual_compounded_returns : bool, optional
Whether the return should be compounded annually.
return_period_unit : 'years', 'months', 'days'
@ -268,18 +320,24 @@ class TimeSeries(TimeSeriesCore):
Example
--------
>>> calculate_returns(datetime.date(2020, 1, 1), years=1)
>>> ts.calculate_returns(datetime.date(2020, 1, 1), years=1)
(datetime.datetime(2020, 1, 1, 0, 0), .0567)
"""
as_on_delta, prior_delta = _preprocess_match_options(as_on_match, prior_match, closest)
prev_date = as_on - relativedelta(**{return_period_unit: return_period_value})
current = _find_closest_date(self.data, as_on, closest_max_days, as_on_delta, if_not_found)
if current[1] != str("nan"):
previous = _find_closest_date(self.data, prev_date, closest_max_days, prior_delta, if_not_found)
current = _find_closest_date(self, as_on, closest_max_days, as_on_delta, if_not_found)
if current[1] == str("nan") or previous[1] == str("nan"):
prev_date = as_on - relativedelta(**{return_period_unit: return_period_value})
if current[1] != str("nan"):
previous = _find_closest_date(self, prev_date, closest_max_days, prior_delta, if_not_found)
if (
current[1] == str("nan")
or previous[1] == str("nan")
or current[0] == str("nan")
or previous[0] == str("nan")
):
return as_on, float("NaN")
returns = current[1] / previous[1]
@ -291,8 +349,8 @@ class TimeSeries(TimeSeriesCore):
@date_parser(1, 2)
def calculate_rolling_returns(
self,
from_date: Union[datetime.date, str],
to_date: Union[datetime.date, str],
from_date: datetime.date | str = None,
to_date: datetime.date | str = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
as_on_match: str = "closest",
prior_match: str = "closest",
@ -315,16 +373,16 @@ class TimeSeries(TimeSeriesCore):
End date for the returns calculation.
frequency : str, optional
Frequency at which the returns should be calcualated.
Frequency at which the returns should be calculated.
Valid values are {D, W, M, Q, H, Y}
as_on_match : str, optional
The match mode to be used for the as on date.
If not specified, the value for the closes parameter will be used.
If not specified, the value for the closest parameter will be used.
prior_match : str, optional
The match mode to be used for the prior date, i.e., the date against which the return will be calculated.
If not specified, the value for the closes parameter will be used.
If not specified, the value for the closest parameter will be used.
closest : previous | next | exact
The default match mode for dates.
@ -342,7 +400,7 @@ class TimeSeries(TimeSeriesCore):
For instance, if the input date is before the starting of the first date of the time series,
but match mode is set to previous. A DateOutOfRangeError will be raised in such cases.
compounding : bool, optional
annual_compounded_returns : bool, optional
Should the returns be compounded annually.
return_period_unit : years | month | days
@ -357,7 +415,7 @@ class TimeSeries(TimeSeriesCore):
Returns
-------
Returs the rolling returns as a TimeSeries object.
Returns the rolling returns as a TimeSeries object.
Raises
------
@ -376,6 +434,13 @@ class TimeSeries(TimeSeriesCore):
frequency = getattr(AllFrequencies, frequency)
except AttributeError:
raise ValueError(f"Invalid argument for frequency {frequency}")
if from_date is None:
from_date = self.start_date + relativedelta(
days=math.ceil(_interval_to_years(return_period_unit, return_period_value) * 365)
)
if to_date is None:
to_date = self.end_date
dates = create_date_series(from_date, to_date, frequency.symbol)
if frequency == AllFrequencies.D:
@ -395,13 +460,13 @@ class TimeSeries(TimeSeriesCore):
)
rolling_returns.append(returns)
rolling_returns.sort()
return self.__class__(rolling_returns, self.frequency.symbol)
return self.__class__(rolling_returns, frequency.symbol)
@date_parser(1, 2)
def volatility(
self,
from_date: Union[datetime.date, str] = None,
to_date: Union[datetime.date, str] = None,
from_date: datetime.date | str = None,
to_date: datetime.date | str = None,
annualize_volatility: bool = True,
traded_days: int = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
@ -416,7 +481,7 @@ class TimeSeries(TimeSeriesCore):
) -> float:
"""Calculates the volatility of the time series.add()
The volatility is calculated as the standard deviaion of periodic returns.
The volatility is calculated as the standard deviation of periodic returns.
The periodicity of returns is based on the periodicity of underlying data.
Parameters:
@ -438,7 +503,7 @@ class TimeSeries(TimeSeriesCore):
Only used when annualizing volatility for a time series with daily frequency.
If not provided, will use the value in FincalOptions.traded_days.
Remaining options are passed on to rolling_return function.
Remaining options are passed on to calculate_rolling_returns function.
Returns:
-------
@ -449,7 +514,7 @@ class TimeSeries(TimeSeriesCore):
ValueError: If frequency string is outside valid values
Also see:
--------
---------
TimeSeries.calculate_rolling_returns()
"""
@ -465,9 +530,12 @@ class TimeSeries(TimeSeriesCore):
from_date = self.start_date + relativedelta(**{return_period_unit: return_period_value})
if to_date is None:
to_date = self.end_date
years = _interval_to_years(return_period_unit, return_period_value)
if annual_compounded_returns is None:
annual_compounded_returns = False if frequency.days <= 366 else True
if years > 1:
annual_compounded_returns = True
else:
annual_compounded_returns = False
rolling_returns = self.calculate_rolling_returns(
from_date=from_date,
@ -484,12 +552,12 @@ class TimeSeries(TimeSeriesCore):
sd = statistics.stdev(rolling_returns.values)
if annualize_volatility:
if traded_days is None:
traded_days = FincalOptions.traded_days
traded_days = PyfactsOptions.traded_days
if return_period_unit == "months":
sd *= math.sqrt(12)
sd *= math.sqrt(12 / return_period_value)
elif return_period_unit == "days":
sd *= math.sqrt(traded_days)
sd *= math.sqrt(traded_days / return_period_value)
return sd
@ -499,6 +567,7 @@ class TimeSeries(TimeSeriesCore):
Parameters
----------
kwargs: parameters to be passed to the calculate_rolling_returns() function
Refer TimeSeries.calculate_rolling_returns() method for more details
Returns
-------
@ -509,19 +578,32 @@ class TimeSeries(TimeSeriesCore):
---------
TimeSeries.calculate_rolling_returns()
"""
kwargs["return_period_unit"] = kwargs.get("return_period_unit", self.frequency.freq_type)
kwargs["return_period_value"] = kwargs.get("return_period_value", 1)
kwargs["to_date"] = kwargs.get("to_date", self.end_date)
if kwargs.get("from_date", None) is None:
start_date = self.start_date + relativedelta(
years = _interval_to_years(kwargs["return_period_unit"], kwargs["return_period_value"])
if kwargs.get("annual_compounded_returns", True):
if years >= 1:
kwargs["annual_compounded_returns"] = True
annualise_returns = False
else:
kwargs["annual_compounded_returns"] = False
annualise_returns = True
elif not kwargs["annual_compounded_returns"]:
annualise_returns = False
if kwargs.get("from_date") is None:
kwargs["from_date"] = self.start_date + relativedelta(
**{kwargs["return_period_unit"]: kwargs["return_period_value"]}
)
kwargs["from_date"] = start_date
kwargs["to_date"] = kwargs.get("to_date", self.end_date)
rr = self.calculate_rolling_returns(**kwargs)
return statistics.mean(rr.values)
mean_rr = statistics.mean(filter(lambda x: str(x) != "nan", rr.values))
if annualise_returns:
mean_rr = (1 + mean_rr) ** (1 / years) - 1
return mean_rr
def max_drawdown(self) -> MaxDrawdown:
"""Calculates the maximum fall the stock has taken between any two points.
@ -552,8 +634,42 @@ class TimeSeries(TimeSeriesCore):
return max_drawdown
def expand(
self, to_frequency: Literal["D", "W", "M", "Q", "H"], method: Literal["ffill", "bfill", "interpolate"]
self,
to_frequency: Literal["D", "W", "M", "Q", "H"],
method: Literal["ffill", "bfill"],
skip_weekends: bool = False,
eomonth: bool = False,
) -> TimeSeries:
"""Expand a time series to a higher frequency.
Parameters
----------
to_frequency : "D", "W", "M", "Q", "H"
Frequency to which the TimeSeries will be expanded.
Must be higher than the current frequency of the TimeSeries.
method : ffill | bfill
Method to be used to fill missing values.
skip_weekends : bool, optional
Whether weekends should be skipped while expanding to daily.
Will be used only if to_frequency is D
eomonth: bool, optional
Whether dates should be end of month dates when frequency is monthly or lower.
Will be used only if to_frequency is M, Q, or H
Returns
-------
TimeSeries
Returns an object of TimeSeries class
Raises
------
ValueError
* If Frequency cannot be recognised
* If to_frequency is same or lower than the current frequency
"""
try:
to_frequency: Frequency = getattr(AllFrequencies, to_frequency)
except AttributeError:
@ -562,19 +678,305 @@ class TimeSeries(TimeSeriesCore):
if to_frequency.days >= self.frequency.days:
raise ValueError("TimeSeries can be only expanded to a higher frequency")
new_dates = create_date_series(self.start_date, self.end_date, frequency=to_frequency.symbol)
new_ts: dict = {dt: self.data.get(dt, None) for dt in new_dates}
new_dates = create_date_series(
self.start_date,
self.end_date,
frequency=to_frequency.symbol,
skip_weekends=skip_weekends,
eomonth=eomonth,
ensure_coverage=True,
)
closest: str = "previous" if method == "ffill" else "next"
new_ts: dict = {dt: self.get(dt, closest=closest)[1] for dt in new_dates}
output_ts: TimeSeries = TimeSeries(new_ts, frequency=to_frequency.symbol)
if method == "ffill":
output_ts.ffill(inplace=True)
elif method == "bfill":
output_ts.bfill(inplace=True)
else:
raise NotImplementedError(f"Method {method} not implemented")
return output_ts
def shrink(
self,
to_frequency: Literal["W", "M", "Q", "H", "Y"],
method: Literal["ffill", "bfill"],
skip_weekends: bool = False,
eomonth: bool = False,
) -> TimeSeries:
"""Shrink a time series to a lower frequency.
Parameters
----------
to_frequency : "W", "M", "Q", "H", "Y"
Frequency to which the TimeSeries will be shrunk.
Must be lower than the current frequency of the TimeSeries.
method : ffill | bfill
Method to be used to fill missing values.
skip_weekends : bool, optional
Whether weekends should be skipped while shrinking to daily.
Will be used only if to_frequency is D
eomonth: bool, optional
Whether dates should be end of month dates when frequency is monthly or lower.
Will be used only if to_frequency is M, Q, H, or Y
Returns
-------
TimeSeries
Returns an object of TimeSeries class
Raises
------
ValueError
* If Frequency cannot be recognised
* If to_frequency is same or higher than the current frequency
"""
try:
to_frequency: Frequency = getattr(AllFrequencies, to_frequency)
except AttributeError:
raise ValueError(f"Invalid argument for to_frequency {to_frequency}")
if to_frequency.days <= self.frequency.days:
raise ValueError("TimeSeries can be only shrunk to a lower frequency")
new_dates = create_date_series(
self.start_date,
self.end_date,
frequency=to_frequency.symbol,
skip_weekends=skip_weekends,
eomonth=eomonth,
ensure_coverage=True,
)
closest: str = "previous" if method == "ffill" else "next"
new_ts = {}
for dt in new_dates:
new_ts.update({dt: self.get(dt, closest=closest)[1]})
# new_ts: dict = {dt: self.get(dt, closest=closest)[1] for dt in new_dates}
output_ts: TimeSeries = TimeSeries(new_ts, frequency=to_frequency.symbol)
return output_ts
def sync(self, other: TimeSeries, fill_method: Literal["ffill", "bfill"] = "ffill") -> TimeSeries:
"""Synchronize two TimeSeries objects
This will ensure that both time series have the same frequency and same set of dates.
The frequency will be set to the higher of the two objects.
Dates will be taken from the class on which the method is called.
Values will be taken from the other class.
Parameters:
-----------
other: TimeSeries
Another object of TimeSeries class whose dates need to be synchronized
fill_method: ffill | bfill, default ffill
Method to use to fill missing values in time series when synchronizing
Returns:
--------
Returns another object of TimeSeries class
Raises:
--------
Raises TypeError if the other object is not of TimeSeries class
"""
if not isinstance(other, TimeSeries):
raise TypeError("Only objects of type TimeSeries can be passed for sync")
if self.frequency.days < other.frequency.days:
other = other.expand(to_frequency=self.frequency.symbol, method=fill_method)
if self.frequency.days > other.frequency.days:
other = other.shrink(to_frequency=other.frequency.symbol, method=fill_method)
new_other: dict = {}
closest = "previous" if fill_method == "ffill" else "next"
for dt in self.dates:
if dt in other:
new_other[dt] = other[dt][1]
else:
new_other[dt] = other.get(dt, closest=closest)[1]
return self.__class__(new_other, frequency=other.frequency.symbol)
def mean(self) -> float:
"""Calculates the mean value of the time series data"""
return statistics.mean(self.values)
def transform(
self,
to_frequency: Literal["W", "M", "Q", "H", "Y"],
method: Literal["sum", "mean"],
eomonth: bool = False,
ensure_coverage: bool = True,
anchor_date=Literal["start", "end"],
) -> TimeSeries:
"""Transform a time series object into a lower frequency object with an aggregation function.
Parameters
----------
to_frequency:
Frequency to which the time series needs to be transformed
method:
Aggregation method to be used. Can be either mean or sum
eomonth:
User end of month dates. Only applicable for frequencies monthly and lower.
Returns
-------
Returns a TimeSeries object
Raises
-------
ValueError:
* If invalid input is passed for frequency
* if invalid input is passed for method
* If to_frequency is higher than the current frequency
"""
try:
to_frequency: Frequency = getattr(AllFrequencies, to_frequency)
except AttributeError:
raise ValueError(f"Invalid argument for to_frequency {to_frequency}")
if to_frequency.days <= self.frequency.days:
raise ValueError("TimeSeries can be only shrunk to a lower frequency")
if method not in ["sum", "mean"]:
raise ValueError(f"Method not recognised: {method}")
dates = create_date_series(
self.start_date,
self.end_date, # + relativedelta(days=to_frequency.days),
to_frequency.symbol,
ensure_coverage=ensure_coverage,
eomonth=eomonth,
)
# prev_date = dates[0]
new_ts_dict = {}
for idx, date in enumerate(dates):
if idx == 0:
cur_data = self[self.dates <= date]
else:
cur_data = self[(self.dates <= date) & (self.dates > dates[idx - 1])]
if method == "sum":
value = sum(cur_data.values)
elif method == "mean":
value = cur_data.mean()
new_ts_dict.update({date: value})
# prev_date = date
return self.__class__(new_ts_dict, to_frequency.symbol)
def _preprocess_csv(
file_path: str | pathlib.Path, delimiter: str = ",", encoding: str = "utf-8", **kwargs
) -> List[list]:
"""Preprocess csv data"""
if isinstance(file_path, str):
file_path = pathlib.Path(file_path)
if not file_path.exists():
raise ValueError("File not found. Check the file path")
with open(file_path, "r", encoding=encoding) as file:
reader: csv.reader = csv.reader(file, delimiter=delimiter, **kwargs)
csv_data: list = list(reader)
csv_data = [i for i in csv_data if i] # remove blank rows
if not csv_data:
raise ValueError("File is empty")
return csv_data
def read_csv(
csv_file_path: str | pathlib.Path,
frequency: Literal["D", "W", "M", "Q", "Y"],
date_format: str = None,
col_names: Tuple[str, str] = None,
col_index: Tuple[int, int] = (0, 1),
has_header: bool = True,
skip_rows: int = 0,
nrows: int = -1,
delimiter: str = ",",
encoding: str = "utf-8",
**kwargs,
) -> TimeSeries:
"""Reads Time Series data directly from a CSV file
Parameters
----------
csv_file_pah:
path of the csv file to be read.
frequency:
frequency of the time series data.
date_format:
date format, specified as datetime compatible string
col_names:
specify the column headers to be read.
this parameter will allow you to read two columns from a CSV file which may have more columns.
this parameter overrides col_index parameter.
dol_index:
specify the column numbers to be read.
this parameter will allow you to read two columns from a CSV file which may have more columns.
if neither names nor index is specified, the first two columns from the csv file will be read,
with the first being treated as date.
has_header:
specify whether the file has a header row.
if true, the header row will be ignored while creating the time series data.
skip_rows:
the number of rows after the header which should be skipped.
nrows:
the number of rows to be read from the csv file.
delimiter:
specify the delimeter used in the csv file.
encoding:
specify the encoding of the csv file.
kwargs:
other keyword arguments to be passed on the csv.reader()
"""
data = _preprocess_csv(csv_file_path, delimiter, encoding)
read_start_row = skip_rows
read_end_row = skip_rows + nrows if nrows >= 0 else None
if has_header:
header = data[read_start_row]
print(header)
# fmt: off
# Black and pylance disagree on the formatting of the following line, hence formatting is disabled
data = data[(read_start_row + 1):read_end_row]
# fmt: on
if col_names is not None:
date_col = header.index(col_names[0])
value_col = header.index(col_names[1])
else:
date_col = col_index[0]
value_col = col_index[1]
ts_data = [(i[date_col], i[value_col]) for i in data if i]
return TimeSeries(ts_data, frequency=frequency, date_format=date_format)
if __name__ == "__main__":
date_series = [

621
pyfacts/statistics.py Normal file
View File

@ -0,0 +1,621 @@
from __future__ import annotations
import datetime
import math
import statistics
from typing import Literal
from pyfacts.core import date_parser
from .pyfacts import TimeSeries, create_date_series
from .utils import _interval_to_years, _preprocess_from_to_date, covariance
# from dateutil.relativedelta import relativedelta
@date_parser(3, 4)
def sharpe_ratio(
time_series_data: TimeSeries,
risk_free_data: TimeSeries = None,
risk_free_rate: float = None,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
"""Calculate the Sharpe ratio of any time series
Sharpe ratio is a measure of returns per unit of risk,
where risk is measured by the standard deviation of the returns.
The formula for Sharpe ratio is:
(average asset return - risk free rate)/volatility of asset returns
Parameters
----------
time_series_data:
The time series for which Sharpe ratio needs to be calculated
risk_free_data:
Risk free rates as time series data.
This should be the time series of risk free returns,
and not the underlying asset value.
risk_free_rate:
Risk free rate to be used.
Either risk_free_data or risk_free_rate needs to be provided.
If both are provided, the time series data will be used.
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
frequency:
The frequency at which returns should be calculated.
return_period_unit: 'years', 'months', 'days'
The type of time period to use for return calculation.
return_period_value: int
The value of the specified interval type over which returns needs to be calculated.
as_on_match: str, optional
The mode of matching the as_on_date. Refer closest.
prior_match: str, optional
The mode of matching the prior_date. Refer closest.
closest: str, optional
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
Returns
-------
Value of Sharpe ratio as a float.
Raises
------
ValueError
If risk free data or risk free rate is not provided.
"""
interval_days = math.ceil(_interval_to_years(return_period_unit, return_period_value) * 365)
if from_date is None:
from_date = time_series_data.start_date + datetime.timedelta(days=interval_days)
if to_date is None:
to_date = time_series_data.end_date
if risk_free_data is None and risk_free_rate is None:
raise ValueError("At least one of risk_free_data or risk_free rate is required")
elif risk_free_data is not None:
risk_free_rate = risk_free_data.mean()
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
}
average_rr = time_series_data.average_rolling_return(**common_params, annual_compounded_returns=True)
excess_returns = average_rr - risk_free_rate
sd = time_series_data.volatility(
**common_params,
annualize_volatility=True,
)
sharpe_ratio_value = excess_returns / sd
return sharpe_ratio_value
@date_parser(2, 3)
def beta(
asset_data: TimeSeries,
market_data: TimeSeries,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
"""Beta is a measure of sensitivity of asset returns to market returns
The formula for beta is:
Parameters
----------
asset_data: TimeSeries
The time series data of the asset
market_data: TimeSeries
The time series data of the relevant market index
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
frequency:
The frequency at which returns should be calculated.
return_period_unit: 'years', 'months', 'days'
The type of time period to use for return calculation.
return_period_value: int
The value of the specified interval type over which returns needs to be calculated.
as_on_match: str, optional
The mode of matching the as_on_date. Refer closest.
prior_match: str, optional
The mode of matching the prior_date. Refer closest.
closest: str, optional
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
Returns
-------
The value of beta as a float.
"""
interval_years = _interval_to_years(return_period_unit, return_period_value)
interval_days = math.ceil(interval_years * 365)
annual_compounded_returns = True if interval_years > 1 else False
if from_date is None:
from_date = asset_data.start_date + datetime.timedelta(days=interval_days)
if to_date is None:
to_date = asset_data.end_date
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
"annual_compounded_returns": annual_compounded_returns,
}
asset_rr = asset_data.calculate_rolling_returns(**common_params)
market_rr = market_data.calculate_rolling_returns(**common_params)
cov = covariance(asset_rr.values, market_rr.values)
market_var = statistics.variance(market_rr.values)
beta = cov / market_var
return beta
@date_parser(4, 5)
def jensens_alpha(
asset_data: TimeSeries,
market_data: TimeSeries,
risk_free_data: TimeSeries = None,
risk_free_rate: float = None,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
"""
This function calculates the Jensen's alpha for a time series.
The formula for Jensen's alpha is:
Ri - Rf + B x (Rm - Rf)
where:
Ri = Realized return of the portfolio or investment
Rf = The risk free rate during the return time frame
B = Beta of the portfolio or investment
Rm = Realized return of the market index
Parameters
----------
asset_data: TimeSeries
The time series data of the asset
market_data: TimeSeries
The time series data of the relevant market index
risk_free_data:
Risk free rates as time series data.
This should be the time series of risk free returns,
and not the underlying asset value.
risk_free_rate:
Risk free rate to be used.
Either risk_free_data or risk_free_rate needs to be provided.
If both are provided, the time series data will be used.
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
frequency:
The frequency at which returns should be calculated.
return_period_unit: 'years', 'months', 'days'
The type of time period to use for return calculation.
return_period_value: int
The value of the specified interval type over which returns needs to be calculated.
as_on_match: str, optional
The mode of matching the as_on_date. Refer closest.
prior_match: str, optional
The mode of matching the prior_date. Refer closest.
closest: str, optional
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
Returns
-------
The value of Jensen's alpha as a float.
"""
interval_years = _interval_to_years(return_period_unit, return_period_value)
interval_days = math.ceil(interval_years * 365)
if from_date is None:
from_date = asset_data.start_date + datetime.timedelta(days=interval_days)
if to_date is None:
to_date = asset_data.end_date
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
}
num_days = (to_date - from_date).days
compound_realised_returns = True if num_days > 365 else False
realized_return = asset_data.calculate_returns(
as_on=to_date,
return_period_unit="days",
return_period_value=num_days,
annual_compounded_returns=compound_realised_returns,
as_on_match=as_on_match,
prior_match=prior_match,
closest=closest,
date_format=date_format,
)
market_return = market_data.calculate_returns(
as_on=to_date,
return_period_unit="days",
return_period_value=num_days,
annual_compounded_returns=compound_realised_returns,
as_on_match=as_on_match,
prior_match=prior_match,
closest=closest,
date_format=date_format,
)
beta_value = beta(asset_data=asset_data, market_data=market_data, **common_params)
if risk_free_data is None and risk_free_rate is None:
raise ValueError("At least one of risk_free_data or risk_free rate is required")
elif risk_free_data is not None:
risk_free_rate = risk_free_data.mean()
jensens_alpha = realized_return[1] - risk_free_rate + beta_value * (market_return[1] - risk_free_rate)
return jensens_alpha
@date_parser(2, 3)
def correlation(
data1: TimeSeries,
data2: TimeSeries,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
"""Calculate the correlation between two assets
correlation calculation is done based on rolling returns.
It must be noted that correlation is not calculated directly on the asset prices.
The asset prices used to calculate returns and correlation is then calculated based on these returns.
Hence this function requires all parameters for rolling returns calculations.
Parameters
----------
data1: TimeSeries
The first time series data
data2: TimeSeries
The second time series data
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
frequency:
The frequency at which returns should be calculated.
return_period_unit: 'years', 'months', 'days'
The type of time period to use for return calculation.
return_period_value: int
The value of the specified interval type over which returns needs to be calculated.
as_on_match: str, optional
The mode of matching the as_on_date. Refer closest.
prior_match: str, optional
The mode of matching the prior_date. Refer closest.
closest: str, optional
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
Returns
-------
The value of beta as a float.
Raises
------
ValueError:
* If frequency of both TimeSeries do not match
* If both time series do not have data between the from date and to date
"""
interval_years = _interval_to_years(return_period_unit, return_period_value)
interval_days = math.ceil(interval_years * 365)
annual_compounded_returns = True if interval_years > 1 else False
if from_date is None:
from_date = data1.start_date + datetime.timedelta(days=interval_days)
if to_date is None:
to_date = data1.end_date
if data1.frequency != data2.frequency:
raise ValueError("Correlation calculation requires both time series to be of same frequency")
if from_date < data2.start_date or to_date > data2.end_date:
raise ValueError("Data between from_date and to_date must be present in both time series")
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
"annual_compounded_returns": annual_compounded_returns,
}
asset_rr = data1.calculate_rolling_returns(**common_params)
market_rr = data2.calculate_rolling_returns(**common_params)
cor = statistics.correlation(asset_rr.values, market_rr.values)
return cor
@date_parser(3, 4)
def sortino_ratio(
time_series_data: TimeSeries,
risk_free_data: TimeSeries = None,
risk_free_rate: float = None,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
frequency: Literal["D", "W", "M", "Q", "H", "Y"] = None,
return_period_unit: Literal["years", "months", "days"] = "years",
return_period_value: int = 1,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> float:
"""Calculate the Sortino ratio of any time series
Sortino ratio is a variation of the Sharpe ratio,
where risk is measured as standard deviation of negative returns only.
Since deviation on the positive side is not undesirable, hence sortino ratio excludes positive deviations.
The formula for Sortino ratio is:
(average asset return - risk free rate)/volatility of negative asset returns
Parameters
----------
time_series_data:
The time series for which Sharpe ratio needs to be calculated
risk_free_data:
Risk free rates as time series data.
This should be the time series of risk free returns,
and not the underlying asset value.
risk_free_rate:
Risk free rate to be used.
Either risk_free_data or risk_free_rate needs to be provided.
If both are provided, the time series data will be used.
from_date:
Start date from which returns should be calculated.
Defaults to the first date of the series.
to_date:
End date till which returns should be calculated.
Defaults to the last date of the series.
frequency:
The frequency at which returns should be calculated.
return_period_unit: 'years', 'months', 'days'
The type of time period to use for return calculation.
return_period_value: int
The value of the specified interval type over which returns needs to be calculated.
as_on_match: str, optional
The mode of matching the as_on_date. Refer closest.
prior_match: str, optional
The mode of matching the prior_date. Refer closest.
closest: str, optional
The mode of matching the closest date.
Valid values are 'exact', 'previous', 'next' and next.
The date format to use for this operation.
Should be passed as a datetime library compatible string.
Sets the date format only for this operation. To set it globally, use FincalOptions.date_format
Returns
-------
Value of Sortino ratio as a float.
Raises
------
ValueError
If risk free data or risk free rate is not provided.
"""
interval_days = math.ceil(_interval_to_years(return_period_unit, return_period_value) * 365)
# if from_date is None:
# from_date = time_series_data.start_date + relativedelta(**{return_period_unit: return_period_value})
# if to_date is None:
# to_date = time_series_data.end_date
from_date, to_date = _preprocess_from_to_date(
from_date,
to_date,
time_series_data,
False,
return_period_unit,
return_period_value,
as_on_match,
prior_match,
closest,
)
if risk_free_data is None and risk_free_rate is None:
raise ValueError("At least one of risk_free_data or risk_free rate is required")
elif risk_free_data is not None:
risk_free_rate = risk_free_data.mean()
common_params = {
"from_date": from_date,
"to_date": to_date,
"frequency": frequency,
"return_period_unit": return_period_unit,
"return_period_value": return_period_value,
"as_on_match": as_on_match,
"prior_match": prior_match,
"closest": closest,
"date_format": date_format,
}
average_rr_ts = time_series_data.calculate_rolling_returns(
**common_params, annual_compounded_returns=False, if_not_found="nan"
)
average_rr = statistics.mean(filter(lambda x: str(x) != "nan", average_rr_ts.values))
annualized_average_rr = (1 + average_rr) ** (365 / interval_days) - 1
excess_returns = annualized_average_rr - risk_free_rate
my_list = [i for i in average_rr_ts.values if i < 0]
sd = statistics.stdev(my_list) # [i for i in average_rr_ts.values if i < 0])
sd *= math.sqrt(365 / interval_days)
sortino_ratio_value = excess_returns / sd
return sortino_ratio_value
@date_parser(3, 4)
def moving_average(
time_series_data: TimeSeries,
moving_average_period_unit: Literal["years", "months", "days"],
moving_average_period_value: int,
from_date: str | datetime.datetime = None,
to_date: str | datetime.datetime = None,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next"] = "previous",
date_format: str = None,
) -> TimeSeries:
from_date, to_date = _preprocess_from_to_date(
from_date,
to_date,
time_series_data,
False,
return_period_unit=moving_average_period_unit,
return_period_value=moving_average_period_value,
as_on_match=as_on_match,
prior_match=prior_match,
closest=closest,
)
dates = create_date_series(from_date, to_date, time_series_data.frequency.symbol)
for date in dates:
start_date = date - datetime.timedelta(**{moving_average_period_unit: moving_average_period_value})
time_series_data[start_date:date]

265
pyfacts/utils.py Normal file
View File

@ -0,0 +1,265 @@
from __future__ import annotations
import datetime
import statistics
from dataclasses import dataclass
from typing import List, Literal, Mapping, Sequence, Tuple
from dateutil.relativedelta import relativedelta
from .exceptions import DateNotFoundError, DateOutOfRangeError
@dataclass
class PyfactsOptions:
date_format: str = "%Y-%m-%d"
closest: str = "previous" # next
traded_days: int = 365
get_closest: str = "exact"
def _parse_date(date: str, date_format: str = None) -> datetime.datetime:
"""Parses date and handles errors
Parameters:
-----------
date: str | datetime.date
The date to be parsed.
If the date passed is already a datetime object, it will return it unprocessed.
date_format: str, default None
The format of the date string in datetime.strftime friendly format.
If format is None, format in FincalOptions.date_format will be used.
Returns:
--------
Returns a datetime.datetime object.
Raises:
-------
TypeError: If the is not a date-like string
ValueError: If the date could not be parsed with the given format
"""
if isinstance(date, (datetime.datetime, datetime.date)):
return datetime.datetime.fromordinal(date.toordinal())
if date_format is None:
date_format = PyfactsOptions.date_format
try:
date = datetime.datetime.strptime(date, date_format)
except TypeError:
raise ValueError("Date does not seem to be valid date-like string")
except ValueError:
raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
return date
def _preprocess_timeseries(
data: Sequence[Tuple[str | datetime.datetime, float]]
| Sequence[Mapping[str | datetime.datetime, float]]
| Mapping[str | datetime.datetime, float],
date_format: str,
) -> List[Tuple[datetime.datetime, float]]:
"""Converts any type of list to the TimeSeries friendly format.
This function is internally called by the __init__ function of the TimeSeriesCore class
The TimeSeries class can internally process a list of Tuples.
However, users have the option of passing a variety of types.
This function preprocesses the data and converts it into the relevant format.
If the data is a dictionary, it will be converted using .items() iteration.
If the data is not a dictionary or a list, it will raise an error.
If the data is of list type:
* If the first item is also of list type, it will be parsed as a list of lists
* If the first item is a dictionary with one key, then key will be parsed as date
* If the first item is a dictionary with two keys, then first key will be date and second will be value
* If the first element is of another type, it will raise an error
The final return value is sorted by date
Parameters:
-----------
Data:
The data for the time series. Can be a dictionary, a list of tuples, or a list of dictionaries.
date_format: str
The format of the date in strftime friendly format.
Returns:
-----------
Returns a list of Tuples where the first element of each tuple is of datetime.datetime class
and the second element is of float class
Raises:
--------
TypeError: If the data is not in a format which can be parsed.
"""
if isinstance(data, Mapping):
current_data: List[tuple] = [(k, v) for k, v in data.items()]
return _preprocess_timeseries(current_data, date_format)
# If data is not a dictionary or list, it cannot be parsed
if not isinstance(data, Sequence):
raise TypeError("Could not parse the data")
if isinstance(data[0], Sequence):
return sorted([(_parse_date(i, date_format), float(j)) for i, j in data])
# If first element is not a dictionary or tuple, it cannot be parsed
if not isinstance(data[0], Mapping):
raise TypeError("Could not parse the data")
if len(data[0]) == 1:
current_data: List[tuple] = [tuple(*i.items()) for i in data]
elif len(data[0]) == 2:
current_data: List[tuple] = [tuple(i.values()) for i in data]
else:
raise TypeError("Could not parse the data")
return _preprocess_timeseries(current_data, date_format)
def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> Tuple[datetime.timedelta]:
"""Checks the arguments and returns appropriate timedelta objects"""
deltas = {"exact": 0, "previous": -1, "next": 1}
if closest not in deltas.keys():
raise ValueError(f"Invalid argument for closest: {closest}")
as_on_match: str = closest if as_on_match == "closest" else as_on_match
prior_match: str = closest if prior_match == "closest" else prior_match
if as_on_match in deltas.keys():
as_on_delta: datetime.timedelta = datetime.timedelta(days=deltas[as_on_match])
else:
raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
if prior_match in deltas.keys():
prior_delta: datetime.timedelta = datetime.timedelta(days=deltas[prior_match])
else:
raise ValueError(f"Invalid prior_match argument: {prior_match}")
return as_on_delta, prior_delta
def _preprocess_from_to_date(
from_date: datetime.date | str,
to_date: datetime.date | str,
time_series: Mapping = None,
align_dates: bool = True,
return_period_unit: Literal["years", "months", "days"] = None,
return_period_value: int = None,
as_on_match: str = "closest",
prior_match: str = "closest",
closest: Literal["previous", "next", "exact"] = "previous",
) -> tuple:
as_on_match, prior_match = _preprocess_match_options(as_on_match, prior_match, closest)
if (from_date is None or to_date is None) and time_series is None:
raise ValueError("Provide either to_date and from_date or time_series data")
if time_series is not None and (return_period_unit is None or return_period_value is None):
raise ValueError("Provide return period for calculation of from_date")
if from_date is None:
expected_start_date = time_series.start_date + relativedelta(**{return_period_unit: return_period_value})
from_date = _find_closest_date(time_series, expected_start_date, 999, as_on_match, "fail")[0]
if to_date is None:
to_date = time_series.end_date
return from_date, to_date
def _find_closest_date(
data: Mapping[datetime.datetime, float],
date: datetime.datetime,
limit_days: int,
delta: datetime.timedelta,
if_not_found: Literal["fail", "nan"],
) -> Tuple[datetime.datetime, float]:
"""Helper function to find data for the closest available date
data:
TimeSeries data
"""
if delta.days < 0 and date < min(data.data):
if if_not_found == "nan":
return float("NaN"), float("NaN")
else:
raise DateOutOfRangeError(date, "min")
if delta.days > 0 and date > max(data.data):
if if_not_found == "nan":
return float("NaN"), float("NaN")
else:
raise DateOutOfRangeError(date, "max")
row: tuple = data.get(date, None)
if row is not None:
return row
if delta and limit_days != 0:
return _find_closest_date(data, date + delta, limit_days - 1, delta, if_not_found)
if if_not_found == "fail":
raise DateNotFoundError("Data not found for date", date)
if if_not_found == "nan":
return date, float("NaN")
raise ValueError(f"Invalid argument for if_not_found: {if_not_found}")
def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> float:
"""Converts any time period to years for use with compounding functions"""
year_conversion_factor: dict = {"years": 1, "months": 12, "days": 365}
years: float = interval_value / year_conversion_factor[interval_type]
return years
def _is_eomonth(dates: Sequence[datetime.datetime], threshold: float = 0.7):
"""Checks if a series is should be treated as end of month date series or not.
If eomonth dates exceed threshold percentage, it will be treated as eomonth series.
This can be used for any frequency, but will work only for monthly and lower frequencies.
"""
eomonth_dates = [date.month != (date + relativedelta(days=1)).month for date in dates]
eomonth_proportion = sum(eomonth_dates) / len(dates)
return eomonth_proportion > threshold
def covariance(series1: list, series2: list) -> float:
"""Returns the covariance of two series
This is a compatibility function for Python versions prior to 3.10.
It will be replaced with statistics.covariance when support is dropped for versions <3.10.
Parameters
----------
series1 : List
A list of numbers
series2 : list
A list of numbers
Returns
-------
float
Returns the covariance as a float value
"""
n = len(series1)
if len(series2) != n:
raise ValueError("Lenght of both series must be same for covariance calcualtion.")
if n < 2:
raise ValueError("At least two data poitns are required for covariance calculation.")
mean1 = statistics.mean(series1)
mean2 = statistics.mean(series2)
xy = sum([(x - mean1) * (y - mean2) for x, y in zip(series1, series2)])
return xy / n

Binary file not shown.

View File

@ -2,21 +2,17 @@ from setuptools import find_packages, setup
license = open("LICENSE").read().strip()
setup(
name="Fincal",
version='0.0.1',
name="pyfacts",
version="0.0.1",
license=license,
author="Gourav Kumar",
author_email="gouravkr@outlook.in",
url="https://gouravkumar.com",
description="A library which makes handling time series data easier",
description="A Python library to perform financial analytics on Time Series data",
long_description=open("README.md").read().strip(),
packages=find_packages(),
install_requires=["python-dateutil"],
test_suite="tests",
entry_points={
"console_scripts": [
"fincal=fincal.__main__:main",
]
},
)

35
test.py
View File

@ -1,35 +0,0 @@
# type: ignore
import datetime
import time
import pandas as pd
from fincal.fincal import TimeSeries
df = pd.read_csv('test_files/nav_history_daily.csv')
df = df.sort_values(by=['amfi_code', 'date']) # type: ignore
data_list = [(i.date, i.nav) for i in df[df.amfi_code == 118825].itertuples()]
start = time.time()
ts_data = TimeSeries(data_list, frequency='M')
print(f"Instantiation took {round((time.time() - start)*1000, 2)} ms")
# ts_data.fill_missing_days()
start = time.time()
# ts_data.calculate_returns(as_on=datetime.datetime(2022, 1, 4), closest='next', years=1)
rr = ts_data.calculate_rolling_returns(datetime.datetime(2015, 1, 1),
datetime.datetime(2022, 1, 21),
frequency='M',
as_on_match='next',
prior_match='previous',
closest='previous',
years=1)
# ffill_data = ts_data.bfill()
print(f"Calculation took {round((time.time() - start)*1000, 2)} ms")
rr.sort()
for i in rr[:10]:
print(i)
# print(ffill_data)
# print(ts_data)
# print(repr(ts_data))

View File

@ -1,58 +0,0 @@
import pandas as pd
from fincal.fincal import TimeSeries, create_date_series
dfd = pd.read_csv("test_files/nav_history_daily - Copy.csv")
dfd = dfd[dfd["amfi_code"] == 118825].reset_index(drop=True)
ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency="D")
repr(ts)
# print(ts[['2022-01-31', '2021-05-28']])
# rr = ts.calculate_rolling_returns(from_date='2021-01-01', to_date='2022-01-01', frequency='D', interval_type='days', interval_value=30, compounding=False)
# data = [
# ("2020-01-01", 10),
# ("2020-02-01", 12),
# ("2020-03-01", 14),
# ("2020-04-01", 16),
# ("2020-05-01", 18),
# ("2020-06-01", 20),
# ("2020-07-01", 22),
# ("2020-08-01", 24),
# ("2020-09-01", 26),
# ("2020-10-01", 28),
# ("2020-11-01", 30),
# ("2020-12-01", 32),
# ("2021-01-01", 34),
# ]
# ts = TimeSeries(data, frequency="M")
# rr = ts.calculate_rolling_returns(
# "2020-02-01",
# "2021-01-01",
# if_not_found="nan",
# compounding=False,
# interval_type="months",
# interval_value=1,
# as_on_match="exact",
# )
# for i in rr:
# print(i)
# returns = ts.calculate_returns(
# "2020-04-25",
# return_actual_date=True,
# closest_max_days=15,
# compounding=True,
# interval_type="days",
# interval_value=90,
# closest="previous",
# if_not_found="fail",
# )
# print(returns)
volatility = ts.volatility(start_date="2018-01-01", end_date="2021-01-01")
print(volatility)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,139 +0,0 @@
amfi_code,date,nav
118825,01-11-2021,87.925
119528,02-11-2021,378.51
118825,02-11-2021,87.885
119528,03-11-2021,377.79
118825,03-11-2021,87.553
119528,08-11-2021,383.13
118825,08-11-2021,88.743
119528,09-11-2021,383.06
118825,09-11-2021,88.793
119528,10-11-2021,382.71
118825,10-11-2021,88.723
118825,10-11-2021,88.78
119528,11-11-2021,379.28
118825,11-11-2021,88.205
119528,12-11-2021,383.94
118825,12-11-2021,89.025
119528,15-11-2021,383.31
118825,15-11-2021,89.182
119528,16-11-2021,381.08
118825,16-11-2021,88.569
119528,17-11-2021,379.17
118825,17-11-2021,88.09
119528,18-11-2021,375.09
118825,18-11-2021,87.202
119528,22-11-2021,368.16
118825,22-11-2021,85.382
119528,23-11-2021,370.64
118825,23-11-2021,85.978
119528,24-11-2021,369.91
118825,24-11-2021,85.635
119528,25-11-2021,371.33
118825,25-11-2021,86.212
119528,26-11-2021,360.66
118825,26-11-2021,83.748
119528,29-11-2021,360.05
118825,29-11-2021,83.523
119528,30-11-2021,359.8
118825,30-11-2021,83.475
119528,01-12-2021,362.35
118825,01-12-2021,84.269
119528,02-12-2021,366.09
118825,02-12-2021,85.105
119528,03-12-2021,363.11
118825,03-12-2021,84.507
119528,06-12-2021,357.21
118825,06-12-2021,83.113
119528,07-12-2021,362.63
118825,07-12-2021,84.429
119528,08-12-2021,368.73
118825,08-12-2021,85.935
119528,09-12-2021,369.49
118825,09-12-2021,86.045
119528,10-12-2021,369.44
118825,10-12-2021,86.058
119528,13-12-2021,367.6
118825,13-12-2021,85.632
119528,14-12-2021,366.36
118825,14-12-2021,85.502
119528,15-12-2021,364.34
118825,15-12-2021,84.989
119528,16-12-2021,363.73
118825,16-12-2021,84.972
119528,17-12-2021,358.17
118825,17-12-2021,83.83
119528,20-12-2021,349.98
118825,20-12-2021,81.817
119528,21-12-2021,353.71
118825,21-12-2021,82.746
119528,22-12-2021,357.93
118825,22-12-2021,83.776
119528,23-12-2021,360.68
118825,23-12-2021,84.297
119528,24-12-2021,359.11
118825,24-12-2021,83.903
119528,27-12-2021,360.71
118825,27-12-2021,84.227
119528,28-12-2021,363.81
118825,28-12-2021,85.044
119528,29-12-2021,363.2
118825,29-12-2021,85.03
119528,30-12-2021,363.31
118825,30-12-2021,85.047
119528,31-12-2021,366.98
118825,31-12-2021,85.759
119528,03-01-2022,371.76
118825,03-01-2022,87.111
119528,04-01-2022,374.22
118825,04-01-2022,87.804
119528,05-01-2022,376.31
118825,05-01-2022,88.162
119528,06-01-2022,373.64
118825,06-01-2022,87.541
119528,07-01-2022,374.68
118825,07-01-2022,87.818
119528,10-01-2022,378.47
118825,10-01-2022,88.622
119528,11-01-2022,379.34
118825,11-01-2022,88.678
119528,12-01-2022,382.86
118825,12-01-2022,89.332
119528,13-01-2022,383.68
118825,13-01-2022,89.553
119528,14-01-2022,384.02
118825,14-01-2022,89.729
119528,17-01-2022,384.36
118825,17-01-2022,89.733
119528,18-01-2022,380
118825,18-01-2022,88.781
119528,19-01-2022,377.24
118825,19-01-2022,88.059
119528,20-01-2022,374.45
118825,20-01-2022,87.361
119528,21-01-2022,369.86
118825,21-01-2022,86.22
119528,24-01-2022,361.01
118825,24-01-2022,83.907
119528,25-01-2022,364.63
118825,25-01-2022,84.763
119528,27-01-2022,361.95
118825,27-01-2022,83.876
119528,28-01-2022,361.91
118825,28-01-2022,83.829
119528,31-01-2022,367.31
118825,31-01-2022,85.18
119528,04-02-2022,371.01
118825,04-02-2022,86.079
119528,07-02-2022,365.04
118825,07-02-2022,84.867
119528,08-02-2022,365.74
118825,08-02-2022,84.945
119528,09-02-2022,369.85
118825,09-02-2022,85.977
119528,10-02-2022,372.29
118825,10-02-2022,86.5
119528,11-02-2022,366.91
118825,11-02-2022,85.226
119528,14-02-2022,355.47
118825,14-02-2022,82.533
1 amfi_code date nav
2 118825 01-11-2021 87.925
3 119528 02-11-2021 378.51
4 118825 02-11-2021 87.885
5 119528 03-11-2021 377.79
6 118825 03-11-2021 87.553
7 119528 08-11-2021 383.13
8 118825 08-11-2021 88.743
9 119528 09-11-2021 383.06
10 118825 09-11-2021 88.793
11 119528 10-11-2021 382.71
12 118825 10-11-2021 88.723
13 118825 10-11-2021 88.78
14 119528 11-11-2021 379.28
15 118825 11-11-2021 88.205
16 119528 12-11-2021 383.94
17 118825 12-11-2021 89.025
18 119528 15-11-2021 383.31
19 118825 15-11-2021 89.182
20 119528 16-11-2021 381.08
21 118825 16-11-2021 88.569
22 119528 17-11-2021 379.17
23 118825 17-11-2021 88.09
24 119528 18-11-2021 375.09
25 118825 18-11-2021 87.202
26 119528 22-11-2021 368.16
27 118825 22-11-2021 85.382
28 119528 23-11-2021 370.64
29 118825 23-11-2021 85.978
30 119528 24-11-2021 369.91
31 118825 24-11-2021 85.635
32 119528 25-11-2021 371.33
33 118825 25-11-2021 86.212
34 119528 26-11-2021 360.66
35 118825 26-11-2021 83.748
36 119528 29-11-2021 360.05
37 118825 29-11-2021 83.523
38 119528 30-11-2021 359.8
39 118825 30-11-2021 83.475
40 119528 01-12-2021 362.35
41 118825 01-12-2021 84.269
42 119528 02-12-2021 366.09
43 118825 02-12-2021 85.105
44 119528 03-12-2021 363.11
45 118825 03-12-2021 84.507
46 119528 06-12-2021 357.21
47 118825 06-12-2021 83.113
48 119528 07-12-2021 362.63
49 118825 07-12-2021 84.429
50 119528 08-12-2021 368.73
51 118825 08-12-2021 85.935
52 119528 09-12-2021 369.49
53 118825 09-12-2021 86.045
54 119528 10-12-2021 369.44
55 118825 10-12-2021 86.058
56 119528 13-12-2021 367.6
57 118825 13-12-2021 85.632
58 119528 14-12-2021 366.36
59 118825 14-12-2021 85.502
60 119528 15-12-2021 364.34
61 118825 15-12-2021 84.989
62 119528 16-12-2021 363.73
63 118825 16-12-2021 84.972
64 119528 17-12-2021 358.17
65 118825 17-12-2021 83.83
66 119528 20-12-2021 349.98
67 118825 20-12-2021 81.817
68 119528 21-12-2021 353.71
69 118825 21-12-2021 82.746
70 119528 22-12-2021 357.93
71 118825 22-12-2021 83.776
72 119528 23-12-2021 360.68
73 118825 23-12-2021 84.297
74 119528 24-12-2021 359.11
75 118825 24-12-2021 83.903
76 119528 27-12-2021 360.71
77 118825 27-12-2021 84.227
78 119528 28-12-2021 363.81
79 118825 28-12-2021 85.044
80 119528 29-12-2021 363.2
81 118825 29-12-2021 85.03
82 119528 30-12-2021 363.31
83 118825 30-12-2021 85.047
84 119528 31-12-2021 366.98
85 118825 31-12-2021 85.759
86 119528 03-01-2022 371.76
87 118825 03-01-2022 87.111
88 119528 04-01-2022 374.22
89 118825 04-01-2022 87.804
90 119528 05-01-2022 376.31
91 118825 05-01-2022 88.162
92 119528 06-01-2022 373.64
93 118825 06-01-2022 87.541
94 119528 07-01-2022 374.68
95 118825 07-01-2022 87.818
96 119528 10-01-2022 378.47
97 118825 10-01-2022 88.622
98 119528 11-01-2022 379.34
99 118825 11-01-2022 88.678
100 119528 12-01-2022 382.86
101 118825 12-01-2022 89.332
102 119528 13-01-2022 383.68
103 118825 13-01-2022 89.553
104 119528 14-01-2022 384.02
105 118825 14-01-2022 89.729
106 119528 17-01-2022 384.36
107 118825 17-01-2022 89.733
108 119528 18-01-2022 380
109 118825 18-01-2022 88.781
110 119528 19-01-2022 377.24
111 118825 19-01-2022 88.059
112 119528 20-01-2022 374.45
113 118825 20-01-2022 87.361
114 119528 21-01-2022 369.86
115 118825 21-01-2022 86.22
116 119528 24-01-2022 361.01
117 118825 24-01-2022 83.907
118 119528 25-01-2022 364.63
119 118825 25-01-2022 84.763
120 119528 27-01-2022 361.95
121 118825 27-01-2022 83.876
122 119528 28-01-2022 361.91
123 118825 28-01-2022 83.829
124 119528 31-01-2022 367.31
125 118825 31-01-2022 85.18
126 119528 04-02-2022 371.01
127 118825 04-02-2022 86.079
128 119528 07-02-2022 365.04
129 118825 07-02-2022 84.867
130 119528 08-02-2022 365.74
131 118825 08-02-2022 84.945
132 119528 09-02-2022 369.85
133 118825 09-02-2022 85.977
134 119528 10-02-2022 372.29
135 118825 10-02-2022 86.5
136 119528 11-02-2022 366.91
137 118825 11-02-2022 85.226
138 119528 14-02-2022 355.47
139 118825 14-02-2022 82.533

View File

@ -1,219 +0,0 @@
"amfi_code","date","nav"
118825,2013-01-31,18.913
118825,2013-02-28,17.723
118825,2013-03-28,17.563
118825,2013-04-30,18.272
118825,2013-05-31,18.383
118825,2013-06-28,17.802
118825,2013-07-31,17.588
118825,2013-08-30,16.993
118825,2013-09-30,17.732
118825,2013-10-31,19.665
118825,2013-11-29,19.787
118825,2013-12-31,20.499
118825,2014-01-31,19.994
118825,2014-02-28,20.942
118825,2014-03-31,22.339
118825,2014-04-30,22.599
118825,2014-05-30,24.937
118825,2014-06-30,27.011
118825,2014-07-31,27.219
118825,2014-08-28,28.625
118825,2014-09-30,29.493
118825,2014-10-31,30.685
118825,2014-11-28,31.956
118825,2014-12-31,31.646
118825,2015-01-30,33.653
118825,2015-02-27,33.581
118825,2015-03-31,33.14
118825,2015-04-30,32.181
118825,2015-05-29,33.256
118825,2015-06-30,33.227
118825,2015-07-31,34.697
118825,2015-08-31,32.833
118825,2015-09-30,32.94
118825,2015-10-30,33.071
118825,2015-11-30,33.024
118825,2015-12-31,33.267
118825,2016-01-29,31.389
118825,2016-02-29,28.751
118825,2016-03-31,32.034
118825,2016-04-29,32.848
118825,2016-05-31,34.135
118825,2016-06-30,35.006
118825,2016-07-29,37.148
118825,2016-08-31,38.005
118825,2016-09-30,37.724
118825,2016-10-28,38.722
118825,2016-11-30,36.689
118825,2016-12-30,36.239
118825,2017-01-31,38.195
118825,2017-02-28,39.873
118825,2017-03-31,41.421
118825,2017-04-28,42.525
118825,2017-05-31,43.977
118825,2017-06-30,43.979
118825,2017-07-31,46.554
118825,2017-08-31,46.383
118825,2017-09-29,46.085
118825,2017-10-31,48.668
118825,2017-11-30,48.824
118825,2017-12-29,50.579
118825,2018-01-31,51.799
118825,2018-02-28,49.041
118825,2018-03-28,46.858
118825,2018-04-30,49.636
118825,2018-05-31,49.169
118825,2018-06-29,48.716
118825,2018-07-31,51.455
118825,2018-08-31,53.494
118825,2018-09-28,49.863
118825,2018-10-31,48.538
118825,2018-11-30,50.597
118825,2018-12-31,50.691
118825,2019-01-31,50.517
118825,2019-02-28,50.176
118825,2019-03-31,54.017
118825,2019-04-30,54.402
118825,2019-05-31,55.334
118825,2019-06-28,55.181
118825,2019-07-31,52.388
118825,2019-08-30,52.214
118825,2019-09-30,54.058
118825,2019-10-31,56.514
118825,2019-11-29,57.42
118825,2019-12-31,57.771
118825,2020-01-31,57.135
118825,2020-02-28,54.034
118825,2020-03-31,41.452
118825,2020-04-30,47.326
118825,2020-05-29,45.845
118825,2020-06-30,49.526
118825,2020-07-31,53.306000000000004
118825,2020-08-19,55.747
118825,2020-10-30,56.387
118825,2020-11-27,62.001000000000005
118825,2020-12-31,66.415
118825,2021-01-29,65.655
118825,2021-02-26,70.317
118825,2021-03-31,70.69
118825,2021-04-30,70.39
118825,2021-05-31,74.85
118825,2021-06-30,77.109
118825,2021-07-30,78.335
118825,2021-08-31,83.691
118825,2021-09-30,86.128
118825,2021-10-29,86.612
118825,2021-11-30,83.475
118825,2021-12-31,85.759
118825,2022-01-31,85.18
118825,2022-02-17,84.33
119528,2013-01-31,101.36
119528,2013-02-28,95.25
119528,2013-03-28,94.81
119528,2013-04-30,99.75
119528,2013-05-31,99.73
119528,2013-06-28,97.52
119528,2013-07-31,95.37
119528,2013-08-30,92.24
119528,2013-09-30,97.45
119528,2013-10-31,107.03
119528,2013-11-29,105.91
119528,2013-12-31,109.3
119528,2014-01-31,105.09
119528,2014-02-28,108.58
119528,2014-03-31,117.28
119528,2014-04-30,118.06
119528,2014-05-30,131.33
119528,2014-06-30,139.48
119528,2014-07-31,140.49
119528,2014-08-28,145.43
119528,2014-09-30,147.4
119528,2014-10-31,154.46
119528,2014-11-28,161.93
119528,2014-12-31,159.62
119528,2015-01-30,170.46
119528,2015-02-27,171.18
119528,2015-03-31,166.8
119528,2015-04-30,161.95
119528,2015-05-29,166.78
119528,2015-06-30,166.67
119528,2015-07-31,172.33
119528,2015-08-31,161.96
119528,2015-09-30,162.25
119528,2015-10-30,164.16
119528,2015-11-30,162.7
119528,2015-12-31,162.83
119528,2016-01-29,155.87
119528,2016-02-29,144.56
119528,2016-03-31,159.88
119528,2016-04-29,163.54
119528,2016-05-31,170.01
119528,2016-06-30,174.61
119528,2016-07-29,184.36
119528,2016-08-31,189.33
119528,2016-09-30,187.16
119528,2016-10-28,189.29
119528,2016-11-30,178.19
119528,2016-12-30,176.66
119528,2017-01-31,185.76
119528,2017-02-28,193.2
119528,2017-03-31,200.54
119528,2017-04-28,205.25
119528,2017-05-31,208.22
119528,2017-06-30,209.83
119528,2017-07-31,221.15
119528,2017-08-31,219.99
119528,2017-09-29,217.7
119528,2017-10-31,226.94
119528,2017-11-30,225.24
119528,2017-12-29,233.26
119528,2018-01-31,237.57
119528,2018-02-28,226.55
119528,2018-03-28,219.73
119528,2018-04-30,232.04
119528,2018-05-31,228.49
119528,2018-06-29,225.27
119528,2018-07-31,237.11
119528,2018-08-31,243.79
119528,2018-09-28,223.83
119528,2018-10-31,218.61
119528,2018-11-30,226.99
119528,2018-12-31,228.61
119528,2019-01-31,224.26
119528,2019-02-28,222.71
119528,2019-03-29,240.21
119528,2019-04-30,240.01
119528,2019-05-31,243.72
119528,2019-06-28,241.28
119528,2019-07-31,229.54
119528,2019-08-30,226.0
119528,2019-09-30,234.75
119528,2019-10-31,242.11
119528,2019-11-29,246.75
119528,2019-12-31,247.81
119528,2020-01-31,246.14
119528,2020-02-28,231.91
119528,2020-03-31,175.98
119528,2020-04-30,200.77
119528,2020-05-29,196.75
119528,2020-06-30,210.55
119528,2020-07-31,224.93
119528,2020-08-19,233.78
119528,2020-10-30,235.83
119528,2020-11-27,264.04
119528,2020-12-31,285.02
119528,2021-01-29,280.52
119528,2021-02-26,300.56
119528,2021-03-31,301.57
119528,2021-04-30,301.1
119528,2021-05-31,320.98
119528,2021-06-30,327.64
119528,2021-07-30,336.6
119528,2021-08-31,360.75
119528,2021-09-30,369.42
119528,2021-10-29,372.89
119528,2021-11-30,359.8
119528,2021-12-31,366.98
119528,2022-01-31,367.31
119528,2022-02-17,363.56
1 amfi_code date nav
2 118825 2013-01-31 18.913
3 118825 2013-02-28 17.723
4 118825 2013-03-28 17.563
5 118825 2013-04-30 18.272
6 118825 2013-05-31 18.383
7 118825 2013-06-28 17.802
8 118825 2013-07-31 17.588
9 118825 2013-08-30 16.993
10 118825 2013-09-30 17.732
11 118825 2013-10-31 19.665
12 118825 2013-11-29 19.787
13 118825 2013-12-31 20.499
14 118825 2014-01-31 19.994
15 118825 2014-02-28 20.942
16 118825 2014-03-31 22.339
17 118825 2014-04-30 22.599
18 118825 2014-05-30 24.937
19 118825 2014-06-30 27.011
20 118825 2014-07-31 27.219
21 118825 2014-08-28 28.625
22 118825 2014-09-30 29.493
23 118825 2014-10-31 30.685
24 118825 2014-11-28 31.956
25 118825 2014-12-31 31.646
26 118825 2015-01-30 33.653
27 118825 2015-02-27 33.581
28 118825 2015-03-31 33.14
29 118825 2015-04-30 32.181
30 118825 2015-05-29 33.256
31 118825 2015-06-30 33.227
32 118825 2015-07-31 34.697
33 118825 2015-08-31 32.833
34 118825 2015-09-30 32.94
35 118825 2015-10-30 33.071
36 118825 2015-11-30 33.024
37 118825 2015-12-31 33.267
38 118825 2016-01-29 31.389
39 118825 2016-02-29 28.751
40 118825 2016-03-31 32.034
41 118825 2016-04-29 32.848
42 118825 2016-05-31 34.135
43 118825 2016-06-30 35.006
44 118825 2016-07-29 37.148
45 118825 2016-08-31 38.005
46 118825 2016-09-30 37.724
47 118825 2016-10-28 38.722
48 118825 2016-11-30 36.689
49 118825 2016-12-30 36.239
50 118825 2017-01-31 38.195
51 118825 2017-02-28 39.873
52 118825 2017-03-31 41.421
53 118825 2017-04-28 42.525
54 118825 2017-05-31 43.977
55 118825 2017-06-30 43.979
56 118825 2017-07-31 46.554
57 118825 2017-08-31 46.383
58 118825 2017-09-29 46.085
59 118825 2017-10-31 48.668
60 118825 2017-11-30 48.824
61 118825 2017-12-29 50.579
62 118825 2018-01-31 51.799
63 118825 2018-02-28 49.041
64 118825 2018-03-28 46.858
65 118825 2018-04-30 49.636
66 118825 2018-05-31 49.169
67 118825 2018-06-29 48.716
68 118825 2018-07-31 51.455
69 118825 2018-08-31 53.494
70 118825 2018-09-28 49.863
71 118825 2018-10-31 48.538
72 118825 2018-11-30 50.597
73 118825 2018-12-31 50.691
74 118825 2019-01-31 50.517
75 118825 2019-02-28 50.176
76 118825 2019-03-31 54.017
77 118825 2019-04-30 54.402
78 118825 2019-05-31 55.334
79 118825 2019-06-28 55.181
80 118825 2019-07-31 52.388
81 118825 2019-08-30 52.214
82 118825 2019-09-30 54.058
83 118825 2019-10-31 56.514
84 118825 2019-11-29 57.42
85 118825 2019-12-31 57.771
86 118825 2020-01-31 57.135
87 118825 2020-02-28 54.034
88 118825 2020-03-31 41.452
89 118825 2020-04-30 47.326
90 118825 2020-05-29 45.845
91 118825 2020-06-30 49.526
92 118825 2020-07-31 53.306000000000004
93 118825 2020-08-19 55.747
94 118825 2020-10-30 56.387
95 118825 2020-11-27 62.001000000000005
96 118825 2020-12-31 66.415
97 118825 2021-01-29 65.655
98 118825 2021-02-26 70.317
99 118825 2021-03-31 70.69
100 118825 2021-04-30 70.39
101 118825 2021-05-31 74.85
102 118825 2021-06-30 77.109
103 118825 2021-07-30 78.335
104 118825 2021-08-31 83.691
105 118825 2021-09-30 86.128
106 118825 2021-10-29 86.612
107 118825 2021-11-30 83.475
108 118825 2021-12-31 85.759
109 118825 2022-01-31 85.18
110 118825 2022-02-17 84.33
111 119528 2013-01-31 101.36
112 119528 2013-02-28 95.25
113 119528 2013-03-28 94.81
114 119528 2013-04-30 99.75
115 119528 2013-05-31 99.73
116 119528 2013-06-28 97.52
117 119528 2013-07-31 95.37
118 119528 2013-08-30 92.24
119 119528 2013-09-30 97.45
120 119528 2013-10-31 107.03
121 119528 2013-11-29 105.91
122 119528 2013-12-31 109.3
123 119528 2014-01-31 105.09
124 119528 2014-02-28 108.58
125 119528 2014-03-31 117.28
126 119528 2014-04-30 118.06
127 119528 2014-05-30 131.33
128 119528 2014-06-30 139.48
129 119528 2014-07-31 140.49
130 119528 2014-08-28 145.43
131 119528 2014-09-30 147.4
132 119528 2014-10-31 154.46
133 119528 2014-11-28 161.93
134 119528 2014-12-31 159.62
135 119528 2015-01-30 170.46
136 119528 2015-02-27 171.18
137 119528 2015-03-31 166.8
138 119528 2015-04-30 161.95
139 119528 2015-05-29 166.78
140 119528 2015-06-30 166.67
141 119528 2015-07-31 172.33
142 119528 2015-08-31 161.96
143 119528 2015-09-30 162.25
144 119528 2015-10-30 164.16
145 119528 2015-11-30 162.7
146 119528 2015-12-31 162.83
147 119528 2016-01-29 155.87
148 119528 2016-02-29 144.56
149 119528 2016-03-31 159.88
150 119528 2016-04-29 163.54
151 119528 2016-05-31 170.01
152 119528 2016-06-30 174.61
153 119528 2016-07-29 184.36
154 119528 2016-08-31 189.33
155 119528 2016-09-30 187.16
156 119528 2016-10-28 189.29
157 119528 2016-11-30 178.19
158 119528 2016-12-30 176.66
159 119528 2017-01-31 185.76
160 119528 2017-02-28 193.2
161 119528 2017-03-31 200.54
162 119528 2017-04-28 205.25
163 119528 2017-05-31 208.22
164 119528 2017-06-30 209.83
165 119528 2017-07-31 221.15
166 119528 2017-08-31 219.99
167 119528 2017-09-29 217.7
168 119528 2017-10-31 226.94
169 119528 2017-11-30 225.24
170 119528 2017-12-29 233.26
171 119528 2018-01-31 237.57
172 119528 2018-02-28 226.55
173 119528 2018-03-28 219.73
174 119528 2018-04-30 232.04
175 119528 2018-05-31 228.49
176 119528 2018-06-29 225.27
177 119528 2018-07-31 237.11
178 119528 2018-08-31 243.79
179 119528 2018-09-28 223.83
180 119528 2018-10-31 218.61
181 119528 2018-11-30 226.99
182 119528 2018-12-31 228.61
183 119528 2019-01-31 224.26
184 119528 2019-02-28 222.71
185 119528 2019-03-29 240.21
186 119528 2019-04-30 240.01
187 119528 2019-05-31 243.72
188 119528 2019-06-28 241.28
189 119528 2019-07-31 229.54
190 119528 2019-08-30 226.0
191 119528 2019-09-30 234.75
192 119528 2019-10-31 242.11
193 119528 2019-11-29 246.75
194 119528 2019-12-31 247.81
195 119528 2020-01-31 246.14
196 119528 2020-02-28 231.91
197 119528 2020-03-31 175.98
198 119528 2020-04-30 200.77
199 119528 2020-05-29 196.75
200 119528 2020-06-30 210.55
201 119528 2020-07-31 224.93
202 119528 2020-08-19 233.78
203 119528 2020-10-30 235.83
204 119528 2020-11-27 264.04
205 119528 2020-12-31 285.02
206 119528 2021-01-29 280.52
207 119528 2021-02-26 300.56
208 119528 2021-03-31 301.57
209 119528 2021-04-30 301.1
210 119528 2021-05-31 320.98
211 119528 2021-06-30 327.64
212 119528 2021-07-30 336.6
213 119528 2021-08-31 360.75
214 119528 2021-09-30 369.42
215 119528 2021-10-29 372.89
216 119528 2021-11-30 359.8
217 119528 2021-12-31 366.98
218 119528 2022-01-31 367.31
219 119528 2022-02-17 363.56

View File

@ -1,11 +0,0 @@
amfi_code,date,nav
118825,31-03-2021,70.69
118825,30-04-2021,70.39
118825,31-05-2021,74.85
118825,30-07-2021,78.335
118825,31-08-2021,83.691
118825,30-09-2021,86.128
118825,29-10-2021,86.612
118825,30-11-2021,83.475
118825,31-01-2022,85.18
118825,17-02-2022,84.33
1 amfi_code date nav
2 118825 31-03-2021 70.69
3 118825 30-04-2021 70.39
4 118825 31-05-2021 74.85
5 118825 30-07-2021 78.335
6 118825 31-08-2021 83.691
7 118825 30-09-2021 86.128
8 118825 29-10-2021 86.612
9 118825 30-11-2021 83.475
10 118825 31-01-2022 85.18
11 118825 17-02-2022 84.33

View File

@ -1,71 +0,0 @@
"amfi_code","date","nav"
118825,2013-03-28,17.563
118825,2013-06-28,17.802
118825,2013-09-30,17.732
118825,2013-12-31,20.499
118825,2014-03-31,22.339
118825,2014-06-30,27.011
118825,2014-09-30,29.493
118825,2014-12-31,31.646
118825,2015-03-31,33.14
118825,2015-06-30,33.227
118825,2015-09-30,32.94
118825,2015-12-31,33.267
118825,2016-03-31,32.034
118825,2016-06-30,35.006
118825,2016-09-30,37.724
118825,2016-12-30,36.239
118825,2017-03-31,41.421
118825,2017-06-30,43.979
118825,2017-09-29,46.085
118825,2017-12-29,50.579
118825,2018-03-28,46.858
118825,2018-06-29,48.716
118825,2018-09-28,49.863
118825,2018-12-31,50.691
118825,2019-03-31,54.017
118825,2019-06-28,55.181
118825,2019-09-30,54.058
118825,2019-12-31,57.771
118825,2020-03-31,41.452
118825,2020-06-30,49.526
118825,2020-12-31,66.415
118825,2021-03-31,70.69
118825,2021-06-30,77.109
118825,2021-09-30,86.128
118825,2021-12-31,85.759
119528,2013-03-28,94.81
119528,2013-06-28,97.52
119528,2013-09-30,97.45
119528,2013-12-31,109.3
119528,2014-03-31,117.28
119528,2014-06-30,139.48
119528,2014-09-30,147.4
119528,2014-12-31,159.62
119528,2015-03-31,166.8
119528,2015-06-30,166.67
119528,2015-09-30,162.25
119528,2015-12-31,162.83
119528,2016-03-31,159.88
119528,2016-06-30,174.61
119528,2016-09-30,187.16
119528,2016-12-30,176.66
119528,2017-03-31,200.54
119528,2017-06-30,209.83
119528,2017-09-29,217.7
119528,2017-12-29,233.26
119528,2018-03-28,219.73
119528,2018-06-29,225.27
119528,2018-09-28,223.83
119528,2018-12-31,228.61
119528,2019-03-29,240.21
119528,2019-06-28,241.28
119528,2019-09-30,234.75
119528,2019-12-31,247.81
119528,2020-03-31,175.98
119528,2020-06-30,210.55
119528,2020-12-31,285.02
119528,2021-03-31,301.57
119528,2021-06-30,327.64
119528,2021-09-30,369.42
119528,2021-12-31,366.98
1 amfi_code date nav
2 118825 2013-03-28 17.563
3 118825 2013-06-28 17.802
4 118825 2013-09-30 17.732
5 118825 2013-12-31 20.499
6 118825 2014-03-31 22.339
7 118825 2014-06-30 27.011
8 118825 2014-09-30 29.493
9 118825 2014-12-31 31.646
10 118825 2015-03-31 33.14
11 118825 2015-06-30 33.227
12 118825 2015-09-30 32.94
13 118825 2015-12-31 33.267
14 118825 2016-03-31 32.034
15 118825 2016-06-30 35.006
16 118825 2016-09-30 37.724
17 118825 2016-12-30 36.239
18 118825 2017-03-31 41.421
19 118825 2017-06-30 43.979
20 118825 2017-09-29 46.085
21 118825 2017-12-29 50.579
22 118825 2018-03-28 46.858
23 118825 2018-06-29 48.716
24 118825 2018-09-28 49.863
25 118825 2018-12-31 50.691
26 118825 2019-03-31 54.017
27 118825 2019-06-28 55.181
28 118825 2019-09-30 54.058
29 118825 2019-12-31 57.771
30 118825 2020-03-31 41.452
31 118825 2020-06-30 49.526
32 118825 2020-12-31 66.415
33 118825 2021-03-31 70.69
34 118825 2021-06-30 77.109
35 118825 2021-09-30 86.128
36 118825 2021-12-31 85.759
37 119528 2013-03-28 94.81
38 119528 2013-06-28 97.52
39 119528 2013-09-30 97.45
40 119528 2013-12-31 109.3
41 119528 2014-03-31 117.28
42 119528 2014-06-30 139.48
43 119528 2014-09-30 147.4
44 119528 2014-12-31 159.62
45 119528 2015-03-31 166.8
46 119528 2015-06-30 166.67
47 119528 2015-09-30 162.25
48 119528 2015-12-31 162.83
49 119528 2016-03-31 159.88
50 119528 2016-06-30 174.61
51 119528 2016-09-30 187.16
52 119528 2016-12-30 176.66
53 119528 2017-03-31 200.54
54 119528 2017-06-30 209.83
55 119528 2017-09-29 217.7
56 119528 2017-12-29 233.26
57 119528 2018-03-28 219.73
58 119528 2018-06-29 225.27
59 119528 2018-09-28 223.83
60 119528 2018-12-31 228.61
61 119528 2019-03-29 240.21
62 119528 2019-06-28 241.28
63 119528 2019-09-30 234.75
64 119528 2019-12-31 247.81
65 119528 2020-03-31 175.98
66 119528 2020-06-30 210.55
67 119528 2020-12-31 285.02
68 119528 2021-03-31 301.57
69 119528 2021-06-30 327.64
70 119528 2021-09-30 369.42
71 119528 2021-12-31 366.98

View File

@ -1,9 +0,0 @@
amfi_code,date,nav
118825,31-03-2019,54.017
118825,28-06-2019,55.181
118825,31-12-2019,57.771
118825,31-03-2020,41.452
118825,30-06-2020,49.526
118825,30-06-2021,77.109
118825,30-09-2021,86.128
118825,31-12-2021,85.759
1 amfi_code date nav
2 118825 31-03-2019 54.017
3 118825 28-06-2019 55.181
4 118825 31-12-2019 57.771
5 118825 31-03-2020 41.452
6 118825 30-06-2020 49.526
7 118825 30-06-2021 77.109
8 118825 30-09-2021 86.128
9 118825 31-12-2021 85.759

View File

@ -1,25 +0,0 @@
import datetime
from fincal.core import Series
s1 = Series([2.5, 6.2, 5.6, 8.4, 7.4, 1.5, 9.6, 5])
dt_lst = [
datetime.datetime(2020, 12, 4, 0, 0),
datetime.datetime(2019, 5, 16, 0, 0),
datetime.datetime(2019, 9, 25, 0, 0),
datetime.datetime(2016, 2, 18, 0, 0),
datetime.datetime(2017, 8, 14, 0, 0),
datetime.datetime(2018, 1, 4, 0, 0),
datetime.datetime(2017, 5, 21, 0, 0),
datetime.datetime(2018, 7, 17, 0, 0),
datetime.datetime(2016, 4, 8, 0, 0),
datetime.datetime(2020, 1, 7, 0, 0),
datetime.datetime(2016, 12, 24, 0, 0),
datetime.datetime(2020, 6, 19, 0, 0),
datetime.datetime(2016, 3, 16, 0, 0),
datetime.datetime(2017, 4, 25, 0, 0),
datetime.datetime(2016, 7, 10, 0, 0)
]
s2 = Series(dt_lst)

View File

@ -1,309 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import pandas as pd\n",
"\n",
"from fincal.fincal import TimeSeries\n",
"from fincal.core import Series"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 125 ms\n",
"Wall time: 99 ms\n"
]
},
{
"data": {
"text/plain": [
"[(datetime.datetime(2022, 1, 31, 0, 0), 310.980011),\n",
" (datetime.datetime(2021, 5, 28, 0, 0), 249.67999300000002)]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"dfd = pd.read_csv('test_files/msft.csv')\n",
"# dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)\n",
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
"repr(ts)\n",
"ts[['2022-01-31', '2021-05-28']]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ffd9665d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(datetime.datetime(2022, 1, 31, 0, 0), 310.980011)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts['2022-01-31']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "086d4377-d1b1-4e51-84c0-39dee28ef75e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 15.6 ms\n",
"Wall time: 16 ms\n"
]
},
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2022, 1, 3, 0, 0), 334.75),\n",
"\t (datetime.datetime(2022, 1, 4, 0, 0), 329.01001),\n",
"\t (datetime.datetime(2022, 1, 5, 0, 0), 316.380005)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 16, 0, 0), 299.5),\n",
"\t (datetime.datetime(2022, 2, 17, 0, 0), 290.730011),\n",
"\t (datetime.datetime(2022, 2, 18, 0, 0), 287.929993)], frequency='D')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"s = ts.dates >= '2022-01-01'\n",
"ts[s]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e815edc9-3746-4192-814e-bd27b2771a0c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 15.6 ms\n",
"Wall time: 4 ms\n"
]
},
{
"data": {
"text/plain": [
"[(datetime.datetime(1992, 2, 19, 0, 0), 2.398438),\n",
" (datetime.datetime(1992, 2, 20, 0, 0), 2.447917),\n",
" (datetime.datetime(1992, 2, 21, 0, 0), 2.385417),\n",
" (datetime.datetime(1992, 2, 24, 0, 0), 2.3932290000000003),\n",
" (datetime.datetime(1992, 2, 25, 0, 0), 2.411458),\n",
" (datetime.datetime(1992, 2, 26, 0, 0), 2.541667),\n",
" (datetime.datetime(1992, 2, 27, 0, 0), 2.601563),\n",
" (datetime.datetime(1992, 2, 28, 0, 0), 2.572917),\n",
" (datetime.datetime(1992, 3, 2, 0, 0), 2.5625),\n",
" (datetime.datetime(1992, 3, 3, 0, 0), 2.567708)]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"ts.iloc[:10]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 297 ms\n",
"Wall time: 290 ms\n"
]
}
],
"source": [
"%%time\n",
"from_date = datetime.date(1994, 1, 1)\n",
"to_date = datetime.date(2022, 1, 1)\n",
"# print(ts.calculate_returns(to_date, years=7))\n",
"rr = ts.calculate_rolling_returns(from_date, to_date)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e5d357b4-4fe5-4a0a-8107-0ab6828d7c41",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(1994, 1, 3, 0, 0), -0.06149359306648605),\n",
"\t (datetime.datetime(1994, 1, 4, 0, 0), -0.05433177603118022),\n",
"\t (datetime.datetime(1994, 1, 5, 0, 0), -0.04913276300578029)\n",
"\t ...\n",
"\t (datetime.datetime(2021, 12, 29, 0, 0), 0.5255410267822715),\n",
"\t (datetime.datetime(2021, 12, 30, 0, 0), 0.5306749265370103),\n",
"\t (datetime.datetime(2021, 12, 31, 0, 0), 0.5120942811985818)], frequency='D')"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rr"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4bad2efa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Series([1.0, 2.0, 3.0, 4.0, 5.0])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sr = Series([1, 2, 3, 4, 5], 'number')\n",
"sr"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "adceda69",
"metadata": {},
"outputs": [],
"source": [
"from fincal.fincal import TimeSeries\n",
"import datetime\n",
"ts = TimeSeries(data = [('2021-01-01', 220), ('2021-02-01', 230), ('2021-03-01', 240)], frequency='M')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "68cf9f8c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(datetime.datetime(2021, 2, 1, 0, 0), 0.045454545454545414)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts.calculate_returns('2021-02-05', interval_type='months', interval_value=1, compounding=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a583347f",
"metadata": {},
"outputs": [],
"source": [
"D = {'a': 1, 'b': 2}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f79ac787",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['a', 'b'])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"D.keys()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

42
tests/README.md Normal file
View File

@ -0,0 +1,42 @@
# Testing Guidelines
PyFacts uses Pytest for unit testing.
All high level functions are expected to have tests written for them. Each file in the pyfacts module has a dedicated test file. All tests related to that file go within the respective test files.
Since this module needs test data for testing, a Pytest fixture has been defined to generate test data. Use this fixture to generate test data. The fixture uses the random module to generate random test data. A seed has been hardcoded for the random data generator to ensure it generates the same data all the time (if it didn't, tests for specific values would never pass).
WARNING! Do not change the seed for the random data generator. This will cause most tests to fail.
To use the fixture, just pass `create_test_data` as an argument to the test function and then use it within the function. Pytest will automatically locate the relevant function (it need not be imported into the test file).
## Writing tests
Tests are organised as follows:
- Each broad function/method has a Test Class
- All variations should be tested within this class using one or more functions
All test files should be named `test_<module_file_name>.py`.
For instance, test file for `core.py` is named `test_core.py`
All class names should begin with the word `Test`.
All function names should begin with the word `test_`.
It needs to be ensured that all test functions are independent of each other.
## Running tests
Skip this part if you already know how to run pytest.
Open the terminal. Make sure you are in the root pyfacts folder. Then run the following command:
`pytest tests`
This will run the entire test suite. This can take some time depending on the number of tests and speed of your computer. Hence you might want to run only a few tests.
To run tests within a particular file, say test_core.py, type the following command:
`pytest tests/test_core.py`
If you want to run only a particular class within a file, for instance `TestSetitem` within the `test_core.py` file, run them as follows:
`pytest tests/test_core.py::TestSetitem`
This will run only the specified class, making sure your tests don't take too long.
If you're using VS Code, you can make this whole process easier by configuring pytest within VS Code. It will identify all tests and allow you to run them individually from the testing pane on the left.
### Before you push your code
Before you push your code or raise a PR, ensure that all tests are passing. PRs where any of the tests are failing will not be merged. Any modifications to the code which require a modification to existing tests should be accompanied with a note in the PR as to the reasons existing tests had to be modified.

111
tests/conftest.py Normal file
View File

@ -0,0 +1,111 @@
import datetime
import math
import random
from typing import List
import pytest
from dateutil.relativedelta import relativedelta
import pyfacts as pft
def conf_add(n1, n2):
return n1 + n2
@pytest.fixture
def conf_fun():
return conf_add
def create_prices(s0: float, mu: float, sigma: float, num_prices: int) -> list:
"""Generates a price following a geometric brownian motion process based on the input of the arguments.
Since this function is used only to generate data for tests, the seed is fixed as 1234.
Many of the tests rely on exact values generated using this seed.
If the seed is changed, those tests will fail.
Parameters:
------------
s0: float
Asset inital price.
mu: float
Interest rate expressed annual terms.
sigma: float
Volatility expressed annual terms.
num_prices: int
number of prices to generate
Returns:
--------
Returns a list of values generated using GBM algorithm
"""
random.seed(1234) # WARNING! Changing the seed will cause most tests to fail
all_values = []
for _ in range(num_prices):
s0 *= math.exp(
(mu - 0.5 * sigma**2) * (1.0 / 365.0) + sigma * math.sqrt(1.0 / 365.0) * random.gauss(mu=0, sigma=1)
)
all_values.append(round(s0, 2))
return all_values
def sample_data_generator(
frequency: pft.Frequency,
start_date: datetime.date = datetime.date(2017, 1, 1),
num: int = 1000,
skip_weekends: bool = False,
mu: float = 0.1,
sigma: float = 0.05,
eomonth: bool = False,
dates_as_string: bool = False,
) -> List[tuple]:
"""Creates TimeSeries data
Parameters:
-----------
frequency: Frequency
The frequency of the time series data to be generated.
num: int
Number of date: value pairs to be generated.
skip_weekends: bool
Whether weekends (saturday, sunday) should be skipped.
Gets used only if the frequency is daily.
mu: float
Mean return for the values.
sigma: float
standard deviation of the values.
Returns:
--------
Returns a TimeSeries object
"""
timedelta_dict = {
frequency.freq_type: int(
frequency.value * num * (7 / 5 if frequency == pft.AllFrequencies.D and skip_weekends else 1)
)
}
end_date = start_date + relativedelta(**timedelta_dict)
dates = pft.create_date_series(
start_date, end_date, frequency.symbol, skip_weekends=skip_weekends, eomonth=eomonth, ensure_coverage=False
)
if dates_as_string:
dates = [dt.strftime("%Y-%m-%d") for dt in dates]
values = create_prices(1000, mu, sigma, num)
ts = list(zip(dates, values))
return ts
@pytest.fixture
def create_test_data():
return sample_data_generator

View File

@ -1,133 +1,435 @@
import datetime
import random
from typing import Literal, Mapping, Sequence
from typing import Mapping
from fincal.core import AllFrequencies, Frequency, Series, TimeSeriesCore
from fincal.fincal import create_date_series
import pyfacts as pft
import pytest
from pyfacts.utils import PyfactsOptions
class TestFrequency:
def test_creation(self):
D = Frequency('daily', 'days', 1, 1, 'D')
D = pft.Frequency("daily", "days", 1, 1, "D")
assert D.days == 1
assert D.symbol == 'D'
assert D.name == 'daily'
assert D.symbol == "D"
assert D.name == "daily"
assert D.value == 1
assert D.freq_type == 'days'
def create_test_data(
frequency: str,
eomonth: bool,
n: int,
gaps: float,
month_position: Literal["start", "middle", "end"],
date_as_str: bool,
as_outer_type: Literal["dict", "list"] = "list",
as_inner_type: Literal["dict", "list", "tuple"] = "tuple",
) -> Sequence[tuple]:
start_dates = {
"start": datetime.datetime(2016, 1, 1),
"middle": datetime.datetime(2016, 1, 15),
"end": datetime.datetime(2016, 1, 31),
}
end_date = datetime.datetime(2021, 12, 31)
dates = create_date_series(start_dates[month_position], end_date, frequency=frequency, eomonth=eomonth)
dates = dates[:n]
if gaps:
num_gaps = int(len(dates) * gaps)
to_remove = random.sample(dates, num_gaps)
for i in to_remove:
dates.remove(i)
if date_as_str:
dates = [i.strftime("%Y-%m-%d") for i in dates]
values = [random.randint(8000, 90000) / 100 for _ in dates]
data = list(zip(dates, values))
if as_outer_type == "list":
if as_inner_type == "list":
data = [list(i) for i in data]
elif as_inner_type == "dict[1]":
data = [dict((i,)) for i in data]
elif as_inner_type == "dict[2]":
data = [dict(date=i, value=j) for i, j in data]
elif as_outer_type == "dict":
data = dict(data)
return data
assert D.freq_type == "days"
class TestAllFrequencies:
def test_attributes(self):
assert hasattr(AllFrequencies, 'D')
assert hasattr(AllFrequencies, 'M')
assert hasattr(AllFrequencies, 'Q')
assert hasattr(pft.AllFrequencies, "D")
assert hasattr(pft.AllFrequencies, "M")
assert hasattr(pft.AllFrequencies, "Q")
def test_days(self):
assert AllFrequencies.D.days == 1
assert AllFrequencies.M.days == 30
assert AllFrequencies.Q.days == 91
assert pft.AllFrequencies.D.days == 1
assert pft.AllFrequencies.M.days == 30
assert pft.AllFrequencies.Q.days == 91
def test_symbol(self):
assert AllFrequencies.H.symbol == 'H'
assert AllFrequencies.W.symbol == 'W'
assert pft.AllFrequencies.H.symbol == "H"
assert pft.AllFrequencies.W.symbol == "W"
def test_values(self):
assert AllFrequencies.H.value == 6
assert AllFrequencies.Y.value == 1
assert pft.AllFrequencies.H.value == 6
assert pft.AllFrequencies.Y.value == 1
def test_type(self):
assert AllFrequencies.Q.freq_type == 'months'
assert AllFrequencies.W.freq_type == 'days'
assert pft.AllFrequencies.Q.freq_type == "months"
assert pft.AllFrequencies.W.freq_type == "days"
class TestSeries:
def test_creation(self):
series = Series([1, 2, 3, 4, 5, 6, 7], data_type='number')
series = pft.Series([1, 2, 3, 4, 5, 6, 7], dtype="number")
assert series.dtype == float
assert series[2] == 3
dates = create_date_series('2021-01-01', '2021-01-31', frequency='D')
series = Series(dates, data_type='date')
dates = pft.create_date_series("2021-01-01", "2021-01-31", frequency="D")
series = pft.Series(dates, dtype="date")
assert series.dtype == datetime.datetime
class TestTimeSeriesCore:
data = [('2021-01-01', 220), ('2021-02-01', 230), ('2021-03-01', 240)]
data = [("2021-01-01", 220), ("2021-02-01", 230), ("2021-03-01", 240)]
def test_repr_str(self):
ts = TimeSeriesCore(self.data, frequency='M')
assert str(ts) in repr(ts).replace('\t', ' ')
def test_repr_str(self, create_test_data):
ts = pft.TimeSeriesCore(self.data, frequency="M")
assert str(ts) in repr(ts).replace("\t", " ")
data = create_test_data(frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True)
ts = TimeSeriesCore(data, frequency="D")
assert '...' in str(ts)
assert '...' in repr(ts)
data = create_test_data(frequency=pft.AllFrequencies.D, eomonth=False, num=50, dates_as_string=True)
ts = pft.TimeSeriesCore(data, frequency="D")
assert "..." in str(ts)
assert "..." in repr(ts)
def test_creation(self):
ts = TimeSeriesCore(self.data, frequency='M')
assert isinstance(ts, TimeSeriesCore)
ts = pft.TimeSeriesCore(self.data, frequency="M")
assert isinstance(ts, pft.TimeSeriesCore)
assert isinstance(ts, Mapping)
def test_creation_no_freq(self, create_test_data):
data = create_test_data(num=300, frequency=pft.AllFrequencies.D)
ts = pft.TimeSeriesCore(data)
assert ts.frequency == pft.AllFrequencies.D
data = create_test_data(num=300, frequency=pft.AllFrequencies.M)
ts = pft.TimeSeriesCore(data)
assert ts.frequency == pft.AllFrequencies.M
def test_creation_no_freq_missing_data(self, create_test_data):
data = create_test_data(num=300, frequency=pft.AllFrequencies.D)
data = random.sample(data, 182)
ts = pft.TimeSeriesCore(data)
assert ts.frequency == pft.AllFrequencies.D
data = create_test_data(num=300, frequency=pft.AllFrequencies.D)
data = random.sample(data, 175)
with pytest.raises(ValueError):
ts = pft.TimeSeriesCore(data)
data = create_test_data(num=100, frequency=pft.AllFrequencies.W)
data = random.sample(data, 70)
ts = pft.TimeSeriesCore(data)
assert ts.frequency == pft.AllFrequencies.W
data = create_test_data(num=100, frequency=pft.AllFrequencies.W)
data = random.sample(data, 68)
with pytest.raises(ValueError):
pft.TimeSeriesCore(data)
def test_creation_wrong_freq(self, create_test_data):
data = create_test_data(num=100, frequency=pft.AllFrequencies.W)
with pytest.raises(ValueError):
pft.TimeSeriesCore(data, frequency="D")
data = create_test_data(num=100, frequency=pft.AllFrequencies.D)
with pytest.raises(ValueError):
pft.TimeSeriesCore(data, frequency="W")
class TestSlicing:
data = [("2021-01-01", 220), ("2021-02-01", 230), ("2021-03-01", 240)]
def test_getitem(self):
ts = TimeSeriesCore(self.data, frequency='M')
ts = pft.TimeSeriesCore(self.data, frequency="M")
assert ts.dates[0] == datetime.datetime(2021, 1, 1, 0, 0)
assert ts.values[0] == 220
assert ts['2021-01-01'][1] == 220
assert len(ts[ts.dates > '2021-01-01']) == 2
assert ts[ts.dates == '2021-02-01'].iloc[0][1] == 230
assert ts["2021-01-01"][1] == 220
assert len(ts[ts.dates > "2021-01-01"]) == 2
assert ts[ts.dates == "2021-02-01"].iloc[0][1] == 230
assert ts.iloc[2][0] == datetime.datetime(2021, 3, 1)
assert len(ts.iloc[:2]) == 2
with pytest.raises(KeyError):
ts["2021-02-03"]
subset_ts = ts[["2021-01-01", "2021-03-01"]]
assert len(subset_ts) == 2
assert isinstance(subset_ts, pft.TimeSeriesCore)
assert subset_ts.iloc[1][1] == 240
def test_get(self):
ts = pft.TimeSeriesCore(self.data, frequency="M")
assert ts.dates[0] == datetime.datetime(2021, 1, 1, 0, 0)
assert ts.values[0] == 220
assert ts.get("2021-01-01")[1] == 220
assert ts.get("2021-02-15") is None
assert ts.get("2021-02-23", -1) == -1
assert ts.get("2021-02-10", closest="previous")[1] == 230
assert ts.get("2021-02-10", closest="next")[1] == 240
PyfactsOptions.get_closest = "previous"
assert ts.get("2021-02-10")[1] == 230
PyfactsOptions.get_closest = "next"
assert ts.get("2021-02-10")[1] == 240
def test_contains(self):
ts = TimeSeriesCore(self.data, frequency='M')
ts = pft.TimeSeriesCore(self.data, frequency="M")
assert datetime.datetime(2021, 1, 1) in ts
assert '2021-01-01' in ts
assert '2021-01-14' not in ts
assert "2021-01-01" in ts
assert "2021-01-14" not in ts
def test_items(self):
ts = TimeSeriesCore(self.data, frequency='M')
ts = pft.TimeSeriesCore(self.data, frequency="M")
for i, j in ts.items():
assert j == self.data[0][1]
break
def test_special_keys(self):
ts = pft.TimeSeriesCore(self.data, frequency="M")
dates = ts["dates"]
values = ts["values"]
assert isinstance(dates, pft.Series)
assert isinstance(values, pft.Series)
assert len(dates) == 3
assert len(values) == 3
assert dates[0] == datetime.datetime(2021, 1, 1, 0, 0)
assert values[0] == 220
def test_iloc_slicing(self):
ts = pft.TimeSeriesCore(self.data, frequency="M")
assert ts.iloc[0] == (datetime.datetime(2021, 1, 1), 220)
assert ts.iloc[-1] == (datetime.datetime(2021, 3, 1), 240)
ts_slice = ts.iloc[0:2]
assert isinstance(ts_slice, pft.TimeSeriesCore)
assert len(ts_slice) == 2
class TestComparativeSlicing:
def test_date_gt_daily(self, create_test_data):
data = create_test_data(num=300, frequency=pft.AllFrequencies.D)
ts = pft.TimeSeries(data, "D")
ts_rr = ts.calculate_rolling_returns(return_period_unit="months")
assert len(ts_rr) == 269
subset = ts_rr[ts_rr.values < 0.1]
assert isinstance(subset, pft.TimeSeriesCore)
assert subset.frequency == pft.AllFrequencies.D
def test_date_gt_monthly(self, create_test_data):
data = create_test_data(num=60, frequency=pft.AllFrequencies.M)
ts = pft.TimeSeries(data, "M")
ts_rr = ts.calculate_rolling_returns(return_period_unit="months")
assert len(ts_rr) == 59
subset = ts_rr[ts_rr.values < 0.1]
assert isinstance(subset, pft.TimeSeriesCore)
assert subset.frequency == pft.AllFrequencies.M
class TestSetitem:
data = [("2021-01-01", 220), ("2021-01-04", 230), ("2021-03-07", 240)]
def test_setitem(self):
ts = pft.TimeSeriesCore(self.data, frequency="M")
assert len(ts) == 3
ts["2021-01-02"] = 225
assert len(ts) == 4
assert ts["2021-01-02"][1] == 225
ts["2021-01-02"] = 227.6
assert len(ts) == 4
assert ts["2021-01-02"][1] == 227.6
def test_errors(self):
ts = pft.TimeSeriesCore(self.data, frequency="M")
with pytest.raises(TypeError):
ts["2021-01-03"] = "abc"
with pytest.raises(NotImplementedError):
ts.iloc[4] = 4
with pytest.raises(ValueError):
ts["abc"] = 12
class TestTimeSeriesCoreHeadTail:
data = [
("2021-01-01", 220),
("2021-02-01", 230),
("2021-03-01", 240),
("2021-04-01", 250),
("2021-05-01", 260),
("2021-06-01", 270),
("2021-07-01", 280),
("2021-08-01", 290),
("2021-09-01", 300),
("2021-10-01", 310),
("2021-11-01", 320),
("2021-12-01", 330),
]
def test_head(self):
ts = pft.TimeSeriesCore(self.data, frequency="M")
assert len(ts.head()) == 6
assert len(ts.head(3)) == 3
assert isinstance(ts.head(), pft.TimeSeriesCore)
head_ts = ts.head(6)
assert head_ts.iloc[-1][1] == 270
def test_tail(self):
ts = pft.TimeSeriesCore(self.data, frequency="M")
assert len(ts.tail()) == 6
assert len(ts.tail(8)) == 8
assert isinstance(ts.tail(), pft.TimeSeriesCore)
tail_ts = ts.tail(6)
assert tail_ts.iloc[0][1] == 280
def test_head_tail(self):
ts = pft.TimeSeriesCore(self.data, frequency="M")
head_tail_ts = ts.head(8).tail(2)
assert isinstance(head_tail_ts, pft.TimeSeriesCore)
assert "2021-07-01" in head_tail_ts
assert head_tail_ts.iloc[1][1] == 290
class TestDelitem:
data = [
("2021-01-01", 220),
("2021-02-01", 230),
("2021-03-01", 240),
("2021-04-01", 250),
]
def test_deletion(self):
ts = pft.TimeSeriesCore(self.data, "M")
assert len(ts) == 4
del ts["2021-03-01"]
assert len(ts) == 3
assert "2021-03-01" not in ts
with pytest.raises(KeyError):
del ts["2021-03-01"]
class TestTimeSeriesComparisons:
data1 = [
("2021-01-01", 220),
("2021-02-01", 230),
("2021-03-01", 240),
("2021-04-01", 250),
]
data2 = [
("2021-01-01", 240),
("2021-02-01", 210),
("2021-03-01", 240),
("2021-04-01", 270),
]
def test_number_comparison(self):
ts1 = pft.TimeSeriesCore(self.data1, "M")
assert isinstance(ts1 > 23, pft.TimeSeriesCore)
assert (ts1 > 230).values == pft.Series([0.0, 0.0, 1.0, 1.0], "float")
assert (ts1 >= 230).values == pft.Series([0.0, 1.0, 1.0, 1.0], "float")
assert (ts1 < 240).values == pft.Series([1.0, 1.0, 0.0, 0.0], "float")
assert (ts1 <= 240).values == pft.Series([1.0, 1.0, 1.0, 0.0], "float")
assert (ts1 == 240).values == pft.Series([0.0, 0.0, 1.0, 0.0], "float")
assert (ts1 != 240).values == pft.Series([1.0, 1.0, 0.0, 1.0], "float")
def test_series_comparison(self):
ts1 = pft.TimeSeriesCore(self.data1, "M")
ser = pft.Series([240, 210, 240, 270], dtype="int")
assert (ts1 > ser).values == pft.Series([0.0, 1.0, 0.0, 0.0], "float")
assert (ts1 >= ser).values == pft.Series([0.0, 1.0, 1.0, 0.0], "float")
assert (ts1 < ser).values == pft.Series([1.0, 0.0, 0.0, 1.0], "float")
assert (ts1 <= ser).values == pft.Series([1.0, 0.0, 1.0, 1.0], "float")
assert (ts1 == ser).values == pft.Series([0.0, 0.0, 1.0, 0.0], "float")
assert (ts1 != ser).values == pft.Series([1.0, 1.0, 0.0, 1.0], "float")
def test_tsc_comparison(self):
ts1 = pft.TimeSeriesCore(self.data1, "M")
ts2 = pft.TimeSeriesCore(self.data2, "M")
assert (ts1 > ts2).values == pft.Series([0.0, 1.0, 0.0, 0.0], "float")
assert (ts1 >= ts2).values == pft.Series([0.0, 1.0, 1.0, 0.0], "float")
assert (ts1 < ts2).values == pft.Series([1.0, 0.0, 0.0, 1.0], "float")
assert (ts1 <= ts2).values == pft.Series([1.0, 0.0, 1.0, 1.0], "float")
assert (ts1 == ts2).values == pft.Series([0.0, 0.0, 1.0, 0.0], "float")
assert (ts1 != ts2).values == pft.Series([1.0, 1.0, 0.0, 1.0], "float")
def test_errors(self):
ts1 = pft.TimeSeriesCore(self.data1, "M")
ts2 = pft.TimeSeriesCore(self.data2, "M")
ser = pft.Series([240, 210, 240], dtype="int")
ser2 = pft.Series(["2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01"], dtype="date")
del ts2["2021-04-01"]
with pytest.raises(TypeError):
ts1 == "a"
with pytest.raises(ValueError):
ts1 > ts2
with pytest.raises(TypeError):
ts1 == ser2
with pytest.raises(ValueError):
ts1 <= ser
with pytest.raises(TypeError):
ts2 < [23, 24, 25, 26]
class TestTimeSeriesArithmatic:
data = [
("2021-01-01", 220),
("2021-02-01", 230),
("2021-03-01", 240),
("2021-04-01", 250),
]
def test_add(self):
ts = pft.TimeSeriesCore(self.data, "M")
ser = ts.values
num_add_ts = ts + 40
assert num_add_ts["2021-01-01"][1] == 260
assert num_add_ts["2021-04-01"][1] == 290
num_radd_ts = 40 + ts
assert num_radd_ts["2021-01-01"][1] == 260
assert num_radd_ts["2021-04-01"][1] == 290
ser_add_ts = ts + ser
assert ser_add_ts["2021-01-01"][1] == 440
assert ser_add_ts["2021-04-01"][1] == 500
ts_add_ts = ts + num_add_ts
assert ts_add_ts["2021-01-01"][1] == 480
assert ts_add_ts["2021-04-01"][1] == 540
def test_sub(self):
ts = pft.TimeSeriesCore(self.data, "M")
ser = pft.Series([20, 30, 40, 50], "number")
num_sub_ts = ts - 40
assert num_sub_ts["2021-01-01"][1] == 180
assert num_sub_ts["2021-04-01"][1] == 210
num_rsub_ts = 240 - ts
assert num_rsub_ts["2021-01-01"][1] == 20
assert num_rsub_ts["2021-04-01"][1] == -10
ser_sub_ts = ts - ser
assert ser_sub_ts["2021-01-01"][1] == 200
assert ser_sub_ts["2021-04-01"][1] == 200
ts_sub_ts = ts - num_sub_ts
assert ts_sub_ts["2021-01-01"][1] == 40
assert ts_sub_ts["2021-04-01"][1] == 40
def test_truediv(self):
ts = pft.TimeSeriesCore(self.data, "M")
ser = pft.Series([22, 23, 24, 25], "number")
num_div_ts = ts / 10
assert num_div_ts["2021-01-01"][1] == 22
assert num_div_ts["2021-04-01"][1] == 25
num_rdiv_ts = 1000 / ts
assert num_rdiv_ts["2021-04-01"][1] == 4
ser_div_ts = ts / ser
assert ser_div_ts["2021-01-01"][1] == 10
assert ser_div_ts["2021-04-01"][1] == 10
ts_div_ts = ts / num_div_ts
assert ts_div_ts["2021-01-01"][1] == 10
assert ts_div_ts["2021-04-01"][1] == 10
def test_floordiv(self):
ts = pft.TimeSeriesCore(self.data, "M")
ser = pft.Series([22, 23, 24, 25], "number")
num_div_ts = ts // 11
assert num_div_ts["2021-02-01"][1] == 20
assert num_div_ts["2021-04-01"][1] == 22
num_rdiv_ts = 1000 // ts
assert num_rdiv_ts["2021-01-01"][1] == 4
ser_div_ts = ts // ser
assert ser_div_ts["2021-01-01"][1] == 10
assert ser_div_ts["2021-04-01"][1] == 10

View File

@ -1,310 +0,0 @@
import datetime
import os
import random
from typing import Literal, Sequence
import pytest
from fincal.core import Frequency, Series
from fincal.exceptions import DateNotFoundError
from fincal.fincal import TimeSeries, create_date_series
from fincal.utils import FincalOptions
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sample_data_path = os.path.join(THIS_DIR, "data")
def create_random_test_data(
frequency: str,
eomonth: bool,
n: int,
gaps: float,
month_position: Literal["start", "middle", "end"],
date_as_str: bool,
as_outer_type: Literal["dict", "list"] = "list",
as_inner_type: Literal["dict", "list", "tuple"] = "tuple",
) -> Sequence[tuple]:
start_dates = {
"start": datetime.datetime(2016, 1, 1),
"middle": datetime.datetime(2016, 1, 15),
"end": datetime.datetime(2016, 1, 31),
}
end_date = datetime.datetime(2021, 12, 31)
dates = create_date_series(start_dates[month_position], end_date, frequency=frequency, eomonth=eomonth)
dates = dates[:n]
if gaps:
num_gaps = int(len(dates) * gaps)
to_remove = random.sample(dates, num_gaps)
for i in to_remove:
dates.remove(i)
if date_as_str:
dates = [i.strftime("%Y-%m-%d") for i in dates]
values = [random.randint(8000, 90000) / 100 for _ in dates]
data = list(zip(dates, values))
if as_outer_type == "list":
if as_inner_type == "list":
data = [list(i) for i in data]
elif as_inner_type == "dict[1]":
data = [dict((i,)) for i in data]
elif as_inner_type == "dict[2]":
data = [dict(date=i, value=j) for i, j in data]
elif as_outer_type == "dict":
data = dict(data)
return data
def create_organised_test_data() -> dict:
"""Creates organised test data so that output is exactly same in each run"""
all_dates, all_values = [], []
prev_date, prev_number = datetime.datetime(2018, 1, 1), 1000
for i in range(1, 1000):
if i % 5 == 0:
prev_date += datetime.timedelta(days=3)
else:
prev_date += datetime.timedelta(days=1)
all_dates.append(prev_date)
for i in range(1, 1000):
rem = i % 7
if rem % 2:
prev_number -= rem
else:
prev_number += rem
all_values.append(prev_number)
return dict(zip(all_dates, all_values))
class TestDateSeries:
def test_daily(self):
start_date = datetime.datetime(2020, 1, 1)
end_date = datetime.datetime(2020, 12, 31)
d = create_date_series(start_date, end_date, frequency="D")
assert len(d) == 366
start_date = datetime.datetime(2017, 1, 1)
end_date = datetime.datetime(2017, 12, 31)
d = create_date_series(start_date, end_date, frequency="D")
assert len(d) == 365
with pytest.raises(ValueError):
create_date_series(start_date, end_date, frequency="D", eomonth=True)
def test_monthly(self):
start_date = datetime.datetime(2020, 1, 1)
end_date = datetime.datetime(2020, 12, 31)
d = create_date_series(start_date, end_date, frequency="M")
assert len(d) == 12
d = create_date_series(start_date, end_date, frequency="M", eomonth=True)
assert datetime.datetime(2020, 2, 29) in d
start_date = datetime.datetime(2020, 1, 31)
d = create_date_series(start_date, end_date, frequency="M")
assert datetime.datetime(2020, 2, 29) in d
assert datetime.datetime(2020, 8, 31) in d
assert datetime.datetime(2020, 10, 30) not in d
start_date = datetime.datetime(2020, 2, 29)
d = create_date_series(start_date, end_date, frequency="M")
assert len(d) == 11
assert datetime.datetime(2020, 2, 29) in d
assert datetime.datetime(2020, 8, 31) not in d
assert datetime.datetime(2020, 10, 29) in d
def test_quarterly(self):
start_date = datetime.datetime(2018, 1, 1)
end_date = datetime.datetime(2020, 12, 31)
d = create_date_series(start_date, end_date, frequency="Q")
assert len(d) == 12
d = create_date_series(start_date, end_date, frequency="Q", eomonth=True)
assert datetime.datetime(2020, 4, 30) in d
start_date = datetime.datetime(2020, 1, 31)
d = create_date_series(start_date, end_date, frequency="Q")
assert len(d) == 4
assert datetime.datetime(2020, 2, 29) not in d
assert max(d) == datetime.datetime(2020, 10, 31)
start_date = datetime.datetime(2020, 2, 29)
d = create_date_series(start_date, end_date, frequency="Q")
assert datetime.datetime(2020, 2, 29) in d
assert datetime.datetime(2020, 8, 31) not in d
assert datetime.datetime(2020, 11, 29) in d
d = create_date_series(start_date, end_date, frequency="Q", eomonth=True)
assert datetime.datetime(2020, 11, 30) in d
class TestFincalBasic:
def test_creation(self):
data = create_random_test_data(
frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True
)
time_series = TimeSeries(data, frequency="D")
assert len(time_series) == 50
assert isinstance(time_series.frequency, Frequency)
assert time_series.frequency.days == 1
ffill_data = time_series.ffill()
assert len(ffill_data) == 50
data = create_random_test_data(
frequency="D", eomonth=False, n=500, gaps=0.1, month_position="start", date_as_str=True
)
time_series = TimeSeries(data, frequency="D")
assert len(time_series) == 450
def test_fill(self):
data = create_random_test_data(
frequency="D", eomonth=False, n=500, gaps=0.1, month_position="start", date_as_str=True
)
time_series = TimeSeries(data, frequency="D")
ffill_data = time_series.ffill()
assert len(ffill_data) >= 498
ffill_data = time_series.ffill(inplace=True)
assert ffill_data is None
assert len(time_series) >= 498
data = create_random_test_data(
frequency="D", eomonth=False, n=500, gaps=0.1, month_position="start", date_as_str=True
)
time_series = TimeSeries(data, frequency="D")
bfill_data = time_series.bfill()
assert len(bfill_data) >= 498
bfill_data = time_series.bfill(inplace=True)
assert bfill_data is None
assert len(time_series) >= 498
data = [("2021-01-01", 220), ("2021-01-02", 230), ("2021-03-04", 240)]
ts = TimeSeries(data, frequency="D")
ff = ts.ffill()
assert ff["2021-01-03"][1] == 230
bf = ts.bfill()
assert bf["2021-01-03"][1] == 240
def test_iloc_slicing(self):
data = create_random_test_data(
frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True
)
time_series = TimeSeries(data, frequency="D")
assert time_series.iloc[0] is not None
assert time_series.iloc[:3] is not None
assert time_series.iloc[5:7] is not None
assert isinstance(time_series.iloc[0], tuple)
assert isinstance(time_series.iloc[10:20], TimeSeries)
assert len(time_series.iloc[10:20]) == 10
def test_key_slicing(self):
data = create_random_test_data(
frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True
)
time_series = TimeSeries(data, frequency="D")
available_date = time_series.iloc[5][0]
assert time_series[available_date] is not None
assert isinstance(time_series["dates"], Series)
assert isinstance(time_series["values"], Series)
assert len(time_series.dates) == 50
assert len(time_series.values) == 50
class TestReturns:
data = [
("2020-01-01", 10),
("2020-02-01", 12),
("2020-03-01", 14),
("2020-04-01", 16),
("2020-05-01", 18),
("2020-06-01", 20),
("2020-07-01", 22),
("2020-08-01", 24),
("2020-09-01", 26),
("2020-10-01", 28),
("2020-11-01", 30),
("2020-12-01", 32),
("2021-01-01", 34),
]
def test_returns_calc(self):
ts = TimeSeries(self.data, frequency="M")
returns = ts.calculate_returns(
"2021-01-01", annual_compounded_returns=False, return_period_unit="years", return_period_value=1
)
assert returns[1] == 2.4
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=False, return_period_unit="months", return_period_value=3
)
assert round(returns[1], 4) == 0.6
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="months", return_period_value=3
)
assert round(returns[1], 4) == 5.5536
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=False, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 4) == 0.6
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 4) == 5.727
returns = ts.calculate_returns(
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 4) == 5.727
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-10", return_period_unit="days", return_period_value=90, as_on_match="exact")
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-10", return_period_unit="days", return_period_value=90, prior_match="exact")
def test_date_formats(self):
ts = TimeSeries(self.data, frequency="M")
FincalOptions.date_format = "%d-%m-%Y"
with pytest.raises(ValueError):
ts.calculate_returns(
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
returns1 = ts.calculate_returns(
"2020-04-10", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
)
returns2 = ts.calculate_returns("10-04-2020", return_period_unit="days", return_period_value=90)
assert round(returns1[1], 4) == round(returns2[1], 4) == 5.727
FincalOptions.date_format = "%m-%d-%Y"
with pytest.raises(ValueError):
ts.calculate_returns(
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
returns1 = ts.calculate_returns(
"2020-04-10", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
)
returns2 = ts.calculate_returns("04-10-2020", return_period_unit="days", return_period_value=90)
assert round(returns1[1], 4) == round(returns2[1], 4) == 5.727
def test_limits(self):
ts = TimeSeries(self.data, frequency="M")
FincalOptions.date_format = "%Y-%m-%d"
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-25", return_period_unit="days", return_period_value=90, closest_max_days=10)
class TestVolatility:
data = create_organised_test_data()
def test_volatility_basic(self):
ts = TimeSeries(self.data, frequency="D")
sd = ts.volatility()
assert len(ts) == 999
assert round(sd, 6) == 0.057391
sd = ts.volatility(annualize_volatility=False)
assert round(sd, 6) == 0.003004

View File

@ -1,210 +0,0 @@
import datetime
import math
import random
from unittest import skip
import pytest
from dateutil.relativedelta import relativedelta
from fincal.core import AllFrequencies, Frequency
from fincal.exceptions import DateNotFoundError
from fincal.fincal import MaxDrawdown, TimeSeries, create_date_series
from fincal.utils import FincalOptions
def create_prices(s0: float, mu: float, sigma: float, num_prices: int) -> list:
"""Generates a price following a geometric brownian motion process based on the input of the arguments.
Since this function is used only to generate data for tests, the seed is fixed as 1234.
Many of the tests rely on exact values generated using this seed.
If the seed is changed, those tests will fail.
Parameters:
------------
s0: float
Asset inital price.
mu: float
Interest rate expressed annual terms.
sigma: float
Volatility expressed annual terms.
num_prices: int
number of prices to generate
Returns:
--------
Returns a list of values generated using GBM algorithm
"""
random.seed(1234) # WARNING! Changing the seed will cause most tests to fail
all_values = []
for _ in range(num_prices):
s0 *= math.exp(
(mu - 0.5 * sigma**2) * (1.0 / 365.0) + sigma * math.sqrt(1.0 / 365.0) * random.gauss(mu=0, sigma=1)
)
all_values.append(round(s0, 2))
return all_values
def create_test_timeseries(
frequency: Frequency, num: int = 1000, skip_weekends: bool = False, mu: float = 0.1, sigma: float = 0.05
) -> TimeSeries:
"""Creates TimeSeries data
Parameters:
-----------
frequency: Frequency
The frequency of the time series data to be generated.
num: int
Number of date: value pairs to be generated.
skip_weekends: bool
Whether weekends (saturday, sunday) should be skipped.
Gets used only if the frequency is daily.
mu: float
Mean return for the values.
sigma: float
standard deviation of the values.
Returns:
--------
Returns a TimeSeries object
"""
start_date = datetime.datetime(2017, 1, 1)
timedelta_dict = {
frequency.freq_type: int(
frequency.value * num * (7 / 5 if frequency == AllFrequencies.D and skip_weekends else 1)
)
}
end_date = start_date + relativedelta(**timedelta_dict)
dates = create_date_series(start_date, end_date, frequency.symbol, skip_weekends=skip_weekends)
values = create_prices(1000, mu, sigma, num)
ts = TimeSeries(dict(zip(dates, values)), frequency=frequency.symbol)
return ts
class TestReturns:
def test_returns_calc(self):
ts = create_test_timeseries(AllFrequencies.D, skip_weekends=True)
returns = ts.calculate_returns(
"2020-01-01", annual_compounded_returns=False, return_period_unit="years", return_period_value=1
)
assert round(returns[1], 6) == 0.112913
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=False, return_period_unit="months", return_period_value=3
)
assert round(returns[1], 6) == 0.015908
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="months", return_period_value=3
)
assert round(returns[1], 6) == 0.065167
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=False, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 6) == 0.017673
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 6) == 0.073632
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-04", return_period_unit="days", return_period_value=90, as_on_match="exact")
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-04", return_period_unit="months", return_period_value=3, prior_match="exact")
def test_date_formats(self):
ts = create_test_timeseries(AllFrequencies.D, skip_weekends=True)
FincalOptions.date_format = "%d-%m-%Y"
with pytest.raises(ValueError):
ts.calculate_returns(
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
returns1 = ts.calculate_returns(
"2020-04-01", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
)
returns2 = ts.calculate_returns("01-04-2020", return_period_unit="days", return_period_value=90)
assert round(returns1[1], 6) == round(returns2[1], 6) == 0.073632
FincalOptions.date_format = "%m-%d-%Y"
with pytest.raises(ValueError):
ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
returns1 = ts.calculate_returns(
"2020-04-01", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
)
returns2 = ts.calculate_returns("04-01-2020", return_period_unit="days", return_period_value=90)
assert round(returns1[1], 6) == round(returns2[1], 6) == 0.073632
def test_limits(self):
FincalOptions.date_format = "%Y-%m-%d"
ts = create_test_timeseries(AllFrequencies.D)
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-11-25", return_period_unit="days", return_period_value=90, closest_max_days=10)
class TestVolatility:
def test_daily_ts(self):
ts = create_test_timeseries(AllFrequencies.D)
assert len(ts) == 1000
sd = ts.volatility(annualize_volatility=False)
assert round(sd, 6) == 0.002622
sd = ts.volatility()
assert round(sd, 6) == 0.050098
sd = ts.volatility(annual_compounded_returns=True)
assert round(sd, 4) == 37.9329
sd = ts.volatility(return_period_unit="months", annual_compounded_returns=True)
assert round(sd, 4) == 0.6778
sd = ts.volatility(return_period_unit="years")
assert round(sd, 6) == 0.023164
sd = ts.volatility(from_date="2017-10-01", to_date="2019-08-31", annualize_volatility=True)
assert round(sd, 6) == 0.050559
sd = ts.volatility(from_date="2017-02-01", frequency="M", return_period_unit="months")
assert round(sd, 6) == 0.050884
sd = ts.volatility(
frequency="M",
return_period_unit="months",
return_period_value=3,
annualize_volatility=False,
)
assert round(sd, 6) == 0.020547
class TestDrawdown:
def test_daily_ts(self):
ts = create_test_timeseries(AllFrequencies.D, skip_weekends=True)
mdd = ts.max_drawdown()
assert isinstance(mdd, dict)
assert len(mdd) == 3
assert all(i in mdd for i in ["start_date", "end_date", "drawdown"])
expeced_response = {
"start_date": datetime.datetime(2017, 6, 6, 0, 0),
"end_date": datetime.datetime(2017, 7, 31, 0, 0),
"drawdown": -0.028293686030751997,
}
assert mdd == expeced_response
def test_weekly_ts(self):
ts = create_test_timeseries(AllFrequencies.W, mu=1, sigma=0.5)
mdd = ts.max_drawdown()
assert isinstance(mdd, dict)
assert len(mdd) == 3
assert all(i in mdd for i in ["start_date", "end_date", "drawdown"])
expeced_response = {
"start_date": datetime.datetime(2019, 2, 17, 0, 0),
"end_date": datetime.datetime(2019, 11, 17, 0, 0),
"drawdown": -0.2584760499552089,
}
assert mdd == expeced_response

588
tests/test_pyfacts.py Normal file
View File

@ -0,0 +1,588 @@
import datetime
import pytest
from pyfacts import (
AllFrequencies,
Frequency,
PyfactsOptions,
TimeSeries,
create_date_series,
)
from pyfacts.exceptions import DateNotFoundError
class TestDateSeries:
def test_daily(self):
start_date = datetime.datetime(2020, 1, 1)
end_date = datetime.datetime(2020, 12, 31)
d = create_date_series(start_date, end_date, frequency="D")
assert len(d) == 366
start_date = datetime.datetime(2017, 1, 1)
end_date = datetime.datetime(2017, 12, 31)
d = create_date_series(start_date, end_date, frequency="D")
assert len(d) == 365
with pytest.raises(ValueError):
create_date_series(start_date, end_date, frequency="D", eomonth=True)
def test_monthly(self):
start_date = datetime.datetime(2020, 1, 1)
end_date = datetime.datetime(2020, 12, 31)
d = create_date_series(start_date, end_date, frequency="M", ensure_coverage=False)
assert len(d) == 12
d = create_date_series(start_date, end_date, frequency="M", eomonth=True)
assert datetime.datetime(2020, 2, 29) in d
start_date = datetime.datetime(2020, 1, 31)
d = create_date_series(start_date, end_date, frequency="M")
assert datetime.datetime(2020, 2, 29) in d
assert datetime.datetime(2020, 8, 31) in d
assert datetime.datetime(2020, 10, 30) not in d
start_date = datetime.datetime(2020, 2, 29)
d = create_date_series(start_date, end_date, frequency="M")
assert len(d) == 11
assert datetime.datetime(2020, 2, 29) in d
assert datetime.datetime(2020, 8, 31) not in d
assert datetime.datetime(2020, 10, 29) in d
def test_quarterly(self):
start_date = datetime.datetime(2018, 1, 1)
end_date = datetime.datetime(2020, 12, 31)
d = create_date_series(start_date, end_date, frequency="Q")
assert len(d) == 12
d = create_date_series(start_date, end_date, frequency="Q", eomonth=True)
assert datetime.datetime(2020, 4, 30) in d
start_date = datetime.datetime(2020, 1, 31)
d = create_date_series(start_date, end_date, frequency="Q")
assert len(d) == 4
assert datetime.datetime(2020, 2, 29) not in d
assert max(d) == datetime.datetime(2020, 10, 31)
start_date = datetime.datetime(2020, 2, 29)
d = create_date_series(start_date, end_date, frequency="Q")
assert datetime.datetime(2020, 2, 29) in d
assert datetime.datetime(2020, 8, 31) not in d
assert datetime.datetime(2020, 11, 29) in d
d = create_date_series(start_date, end_date, frequency="Q", eomonth=True)
assert datetime.datetime(2020, 11, 30) in d
class TestTimeSeriesCreation:
def test_creation_with_list_of_tuples(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
ts = TimeSeries(ts_data, frequency="D")
assert len(ts) == 50
assert isinstance(ts.frequency, Frequency)
assert ts.frequency.days == 1
def test_creation_with_string_dates(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
ts_data1 = [(dt.strftime("%Y-%m-%d"), val) for dt, val in ts_data]
ts = TimeSeries(ts_data1, frequency="D")
datetime.datetime(2017, 1, 1) in ts
ts_data1 = [(dt.strftime("%d-%m-%Y"), val) for dt, val in ts_data]
ts = TimeSeries(ts_data1, frequency="D", date_format="%d-%m-%Y")
datetime.datetime(2017, 1, 1) in ts
ts_data1 = [(dt.strftime("%m-%d-%Y"), val) for dt, val in ts_data]
ts = TimeSeries(ts_data1, frequency="D", date_format="%m-%d-%Y")
datetime.datetime(2017, 1, 1) in ts
ts_data1 = [(dt.strftime("%m-%d-%Y %H:%M"), val) for dt, val in ts_data]
ts = TimeSeries(ts_data1, frequency="D", date_format="%m-%d-%Y %H:%M")
datetime.datetime(2017, 1, 1, 0, 0) in ts
def test_creation_with_list_of_dicts(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
ts_data1 = [{"date": dt.strftime("%Y-%m-%d"), "value": val} for dt, val in ts_data]
ts = TimeSeries(ts_data1, frequency="D")
datetime.datetime(2017, 1, 1) in ts
def test_creation_with_list_of_lists(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
ts_data1 = [[dt.strftime("%Y-%m-%d"), val] for dt, val in ts_data]
ts = TimeSeries(ts_data1, frequency="D")
datetime.datetime(2017, 1, 1) in ts
def test_creation_with_dict(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.D, num=50)
ts_data1 = [{dt.strftime("%Y-%m-%d"): val} for dt, val in ts_data]
ts = TimeSeries(ts_data1, frequency="D")
datetime.datetime(2017, 1, 1) in ts
class TestTimeSeriesBasics:
def test_fill(self, create_test_data):
PyfactsOptions.get_closest = "exact"
ts_data = create_test_data(frequency=AllFrequencies.D, num=50, skip_weekends=True)
ts = TimeSeries(ts_data, frequency="D")
ffill_data = ts.ffill()
assert len(ffill_data) == 68
ffill_data = ts.ffill(inplace=True)
assert ffill_data is None
assert len(ts) == 68
ts_data = create_test_data(frequency=AllFrequencies.D, num=50, skip_weekends=True)
ts = TimeSeries(ts_data, frequency="D")
bfill_data = ts.bfill()
assert len(bfill_data) == 68
bfill_data = ts.bfill(inplace=True)
assert bfill_data is None
assert len(ts) == 68
data = [("2021-01-01", 220), ("2021-01-02", 230), ("2021-01-04", 240)]
ts = TimeSeries(data, frequency="D")
ff = ts.ffill()
assert ff["2021-01-03"][1] == 230
bf = ts.bfill()
assert bf["2021-01-03"][1] == 240
def test_fill_weekly(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.W, num=10)
ts_data.pop(2)
ts_data.pop(6)
ts = TimeSeries(ts_data, frequency="W")
assert len(ts) == 8
ff = ts.ffill()
assert len(ff) == 10
assert "2017-01-15" in ff
assert ff["2017-01-15"][1] == ff["2017-01-08"][1]
bf = ts.bfill()
assert len(ff) == 10
assert "2017-01-15" in bf
assert bf["2017-01-15"][1] == bf["2017-01-22"][1]
def test_fill_monthly(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.M, num=10)
ts_data.pop(2)
ts_data.pop(6)
ts = TimeSeries(ts_data, frequency="M")
assert len(ts) == 8
ff = ts.ffill()
assert len(ff) == 10
assert "2017-03-01" in ff
assert ff["2017-03-01"][1] == ff["2017-02-01"][1]
bf = ts.bfill()
assert len(bf) == 10
assert "2017-08-01" in bf
assert bf["2017-08-01"][1] == bf["2017-09-01"][1]
def test_fill_eomonthly(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.M, num=10, eomonth=True)
ts_data.pop(2)
ts_data.pop(6)
ts = TimeSeries(ts_data, frequency="M")
assert len(ts) == 8
ff = ts.ffill()
assert len(ff) == 10
assert "2017-03-31" in ff
assert ff["2017-03-31"][1] == ff["2017-02-28"][1]
bf = ts.bfill()
assert len(bf) == 10
assert "2017-08-31" in bf
assert bf["2017-08-31"][1] == bf["2017-09-30"][1]
def test_fill_quarterly(self, create_test_data):
ts_data = create_test_data(frequency=AllFrequencies.Q, num=10, eomonth=True)
ts_data.pop(2)
ts_data.pop(6)
ts = TimeSeries(ts_data, frequency="Q")
assert len(ts) == 8
ff = ts.ffill()
assert len(ff) == 10
assert "2017-07-31" in ff
assert ff["2017-07-31"][1] == ff["2017-04-30"][1]
bf = ts.bfill()
assert len(bf) == 10
assert "2018-10-31" in bf
assert bf["2018-10-31"][1] == bf["2019-01-31"][1]
class TestReturns:
def test_returns_calc(self, create_test_data):
ts_data = create_test_data(AllFrequencies.D, skip_weekends=True)
ts = TimeSeries(ts_data, "D")
returns = ts.calculate_returns(
"2020-01-01", annual_compounded_returns=False, return_period_unit="years", return_period_value=1
)
assert round(returns[1], 6) == 0.112913
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=False, return_period_unit="months", return_period_value=3
)
assert round(returns[1], 6) == 0.015908
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="months", return_period_value=3
)
assert round(returns[1], 6) == 0.065167
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=False, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 6) == 0.017673
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 6) == 0.073632
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-04", return_period_unit="days", return_period_value=90, as_on_match="exact")
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-08", return_period_unit="months", return_period_value=1, prior_match="exact")
def test_date_formats(self, create_test_data):
ts_data = create_test_data(AllFrequencies.D, skip_weekends=True)
ts = TimeSeries(ts_data, "D")
PyfactsOptions.date_format = "%d-%m-%Y"
with pytest.raises(ValueError):
ts.calculate_returns(
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
returns1 = ts.calculate_returns(
"2020-04-01", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
)
returns2 = ts.calculate_returns("01-04-2020", return_period_unit="days", return_period_value=90)
assert round(returns1[1], 6) == round(returns2[1], 6) == 0.073632
PyfactsOptions.date_format = "%m-%d-%Y"
with pytest.raises(ValueError):
ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
returns1 = ts.calculate_returns(
"2020-04-01", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
)
returns2 = ts.calculate_returns("04-01-2020", return_period_unit="days", return_period_value=90)
assert round(returns1[1], 6) == round(returns2[1], 6) == 0.073632
def test_limits(self, create_test_data):
PyfactsOptions.date_format = "%Y-%m-%d"
ts_data = create_test_data(AllFrequencies.D)
ts = TimeSeries(ts_data, "D")
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-11-25", return_period_unit="days", return_period_value=90, closest_max_days=10)
def test_rolling_returns(self):
# To-do
return True
class TestExpand:
def test_weekly_to_daily(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, num=10)
ts = TimeSeries(ts_data, "W")
expanded_ts = ts.expand("D", "ffill")
assert len(expanded_ts) == 64
assert expanded_ts.frequency.name == "daily"
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_weekly_to_daily_no_weekends(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, num=10)
ts = TimeSeries(ts_data, "W")
expanded_ts = ts.expand("D", "ffill", skip_weekends=True)
assert len(expanded_ts) == 46
assert expanded_ts.frequency.name == "daily"
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_monthly_to_daily(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, num=6)
ts = TimeSeries(ts_data, "M")
expanded_ts = ts.expand("D", "ffill")
assert len(expanded_ts) == 152
assert expanded_ts.frequency.name == "daily"
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_monthly_to_daily_no_weekends(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, num=6)
ts = TimeSeries(ts_data, "M")
expanded_ts = ts.expand("D", "ffill", skip_weekends=True)
assert len(expanded_ts) == 109
assert expanded_ts.frequency.name == "daily"
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_monthly_to_weekly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, num=6)
ts = TimeSeries(ts_data, "M")
expanded_ts = ts.expand("W", "ffill")
assert len(expanded_ts) == 23
assert expanded_ts.frequency.name == "weekly"
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
def test_yearly_to_monthly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.Y, num=5)
ts = TimeSeries(ts_data, "Y")
expanded_ts = ts.expand("M", "ffill")
assert len(expanded_ts) == 49
assert expanded_ts.frequency.name == "monthly"
assert expanded_ts.iloc[0][1] == expanded_ts.iloc[1][1]
class TestShrink:
def test_daily_to_smaller(self, create_test_data):
ts_data = create_test_data(AllFrequencies.D, num=1000)
ts = TimeSeries(ts_data, "D")
shrunk_ts_w = ts.shrink("W", "ffill")
shrunk_ts_m = ts.shrink("M", "ffill")
assert len(shrunk_ts_w) == 144
assert len(shrunk_ts_m) == 34
def test_weekly_to_smaller(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, num=300)
ts = TimeSeries(ts_data, "W")
tsm = ts.shrink("M", "ffill")
assert len(tsm) == 70
tsmeo = ts.shrink("M", "ffill", eomonth=True)
assert len(tsmeo) == 69
with pytest.raises(ValueError):
ts.shrink("D", "ffill")
class TestMeanReturns:
# TODO
pass
class TestReadCsv:
# TODO
pass
class TestTransform:
def test_daily_to_weekly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.D, num=782, skip_weekends=True)
ts = TimeSeries(ts_data, "D")
tst = ts.transform("W", "mean", ensure_coverage=False)
assert isinstance(tst, TimeSeries)
assert len(tst) == 157
assert "2017-01-30" in tst
assert tst.iloc[4] == (datetime.datetime(2017, 1, 30), 1020.082)
def test_daily_to_monthly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.D, num=782, skip_weekends=False)
ts = TimeSeries(ts_data, "D")
tst = ts.transform("M", "mean")
assert isinstance(tst, TimeSeries)
assert len(tst) == 27
assert "2018-01-01" in tst
assert round(tst.iloc[12][1], 2) == 1146.91
def test_daily_to_yearly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.D, num=782, skip_weekends=True)
ts = TimeSeries(ts_data, "D")
tst = ts.transform("Y", "mean")
assert isinstance(tst, TimeSeries)
assert len(tst) == 4
assert "2019-01-02" in tst
assert tst.iloc[2] == (datetime.datetime(2019, 1, 2), 1157.2835632183908)
def test_weekly_to_monthly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, num=261)
ts = TimeSeries(ts_data, "W")
tst = ts.transform("M", "mean")
assert isinstance(tst, TimeSeries)
assert "2017-01-01" in tst
assert tst.iloc[1] == (datetime.datetime(2017, 2, 1), 1008.405)
def test_weekly_to_qty(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, num=261)
ts = TimeSeries(ts_data, "W")
tst = ts.transform("Q", "mean")
assert len(tst) == 21
assert "2018-01-01" in tst
assert round(tst.iloc[4][1], 2) == 1032.01
def test_weekly_to_yearly(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, num=261)
ts = TimeSeries(ts_data, "W")
tst = ts.transform("Y", "mean")
assert "2019-01-01" in tst
assert round(tst.iloc[2][1], 2) == 1053.70
with pytest.raises(ValueError):
ts.transform("D", "mean")
def test_monthly_to_qty(self, create_test_data):
ts_data = create_test_data(AllFrequencies.M, num=36)
ts = TimeSeries(ts_data, "M")
tst = ts.transform("Q", "mean")
assert len(tst) == 13
assert "2018-10-01" in tst
assert tst.iloc[7] == (datetime.datetime(2018, 10, 1), 1022.6466666666666)
with pytest.raises(ValueError):
ts.transform("M", "sum")
class TestReturnsAgain:
data = [
("2020-01-01", 10),
("2020-02-01", 12),
("2020-03-01", 14),
("2020-04-01", 16),
("2020-05-01", 18),
("2020-06-01", 20),
("2020-07-01", 22),
("2020-08-01", 24),
("2020-09-01", 26),
("2020-10-01", 28),
("2020-11-01", 30),
("2020-12-01", 32),
("2021-01-01", 34),
]
def test_returns_calc(self):
ts = TimeSeries(self.data, frequency="M")
returns = ts.calculate_returns(
"2021-01-01", annual_compounded_returns=False, return_period_unit="years", return_period_value=1
)
assert returns[1] == 2.4
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=False, return_period_unit="months", return_period_value=3
)
assert round(returns[1], 4) == 0.6
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="months", return_period_value=3
)
assert round(returns[1], 4) == 5.5536
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=False, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 4) == 0.6
returns = ts.calculate_returns(
"2020-04-01", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 4) == 5.727
returns = ts.calculate_returns(
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
assert round(returns[1], 4) == 5.727
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-10", return_period_unit="days", return_period_value=90, as_on_match="exact")
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-10", return_period_unit="days", return_period_value=90, prior_match="exact")
def test_date_formats(self):
ts = TimeSeries(self.data, frequency="M")
PyfactsOptions.date_format = "%d-%m-%Y"
with pytest.raises(ValueError):
ts.calculate_returns(
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
returns1 = ts.calculate_returns(
"2020-04-10", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
)
returns2 = ts.calculate_returns("10-04-2020", return_period_unit="days", return_period_value=90)
assert round(returns1[1], 4) == round(returns2[1], 4) == 5.727
PyfactsOptions.date_format = "%m-%d-%Y"
with pytest.raises(ValueError):
ts.calculate_returns(
"2020-04-10", annual_compounded_returns=True, return_period_unit="days", return_period_value=90
)
returns1 = ts.calculate_returns(
"2020-04-10", return_period_unit="days", return_period_value=90, date_format="%Y-%m-%d"
)
returns2 = ts.calculate_returns("04-10-2020", return_period_unit="days", return_period_value=90)
assert round(returns1[1], 4) == round(returns2[1], 4) == 5.727
def test_limits(self):
ts = TimeSeries(self.data, frequency="M")
PyfactsOptions.date_format = "%Y-%m-%d"
with pytest.raises(DateNotFoundError):
ts.calculate_returns("2020-04-25", return_period_unit="days", return_period_value=90, closest_max_days=10)
class TestVolatility:
def test_daily_ts(self, create_test_data):
ts_data = create_test_data(AllFrequencies.D)
ts = TimeSeries(ts_data, "D")
assert len(ts) == 1000
sd = ts.volatility(annualize_volatility=False)
assert round(sd, 6) == 0.002622
sd = ts.volatility()
assert round(sd, 6) == 0.050098
sd = ts.volatility(annual_compounded_returns=True)
assert round(sd, 4) == 37.9329
sd = ts.volatility(return_period_unit="months", annual_compounded_returns=True)
assert round(sd, 4) == 0.6778
sd = ts.volatility(return_period_unit="years")
assert round(sd, 6) == 0.023164
sd = ts.volatility(from_date="2017-10-01", to_date="2019-08-31", annualize_volatility=True)
assert round(sd, 6) == 0.050559
sd = ts.volatility(from_date="2017-02-01", frequency="M", return_period_unit="months")
assert round(sd, 6) == 0.050884
sd = ts.volatility(
frequency="M",
return_period_unit="months",
return_period_value=3,
annualize_volatility=False,
)
assert round(sd, 6) == 0.020547
class TestDrawdown:
def test_daily_ts(self, create_test_data):
ts_data = create_test_data(AllFrequencies.D, skip_weekends=True)
ts = TimeSeries(ts_data, "D")
mdd = ts.max_drawdown()
assert isinstance(mdd, dict)
assert len(mdd) == 3
assert all(i in mdd for i in ["start_date", "end_date", "drawdown"])
expeced_response = {
"start_date": datetime.datetime(2017, 6, 6, 0, 0),
"end_date": datetime.datetime(2017, 7, 31, 0, 0),
"drawdown": -0.028293686030751997,
}
assert mdd == expeced_response
def test_weekly_ts(self, create_test_data):
ts_data = create_test_data(AllFrequencies.W, mu=1, sigma=0.5)
ts = TimeSeries(ts_data, "W")
mdd = ts.max_drawdown()
assert isinstance(mdd, dict)
assert len(mdd) == 3
assert all(i in mdd for i in ["start_date", "end_date", "drawdown"])
expeced_response = {
"start_date": datetime.datetime(2019, 2, 17, 0, 0),
"end_date": datetime.datetime(2019, 11, 17, 0, 0),
"drawdown": -0.2584760499552089,
}
assert mdd == expeced_response
class TestSync:
def test_weekly_to_daily(self, create_test_data):
daily_data = create_test_data(AllFrequencies.D, num=15)
weekly_data = create_test_data(AllFrequencies.W, num=3)
daily_ts = TimeSeries(daily_data, frequency="D")
weekly_ts = TimeSeries(weekly_data, frequency="W")
synced_weekly_ts = daily_ts.sync(weekly_ts)
assert len(daily_ts) == len(synced_weekly_ts)
assert synced_weekly_ts.frequency == AllFrequencies.D
assert "2017-01-02" in synced_weekly_ts
assert synced_weekly_ts["2017-01-02"][1] == synced_weekly_ts["2017-01-01"][1]

172
tests/test_stats.py Normal file
View File

@ -0,0 +1,172 @@
import pyfacts as pft
def test_conf(conf_fun):
conf_add = conf_fun
assert conf_add(2, 4) == 6
class TestSharpe:
def test_sharpe_daily_freq(self, create_test_data):
data = create_test_data(num=1305, frequency=pft.AllFrequencies.D, skip_weekends=True)
ts = pft.TimeSeries(data, "D")
sharpe_ratio = pft.sharpe_ratio(
ts,
risk_free_rate=0.06,
from_date="2017-02-04",
to_date="2021-12-31",
return_period_unit="months",
return_period_value=1,
)
assert round(sharpe_ratio, 4) == 1.0502
sharpe_ratio = pft.sharpe_ratio(
ts,
risk_free_rate=0.06,
from_date="2017-01-09",
to_date="2021-12-31",
return_period_unit="days",
return_period_value=7,
)
assert round(sharpe_ratio, 4) == 1.0701
sharpe_ratio = pft.sharpe_ratio(
ts,
risk_free_rate=0.06,
from_date="2018-01-02",
to_date="2021-12-31",
return_period_unit="years",
return_period_value=1,
)
assert round(sharpe_ratio, 4) == 1.4374
sharpe_ratio = pft.sharpe_ratio(
ts,
risk_free_rate=0.06,
from_date="2017-07-03",
to_date="2021-12-31",
return_period_unit="months",
return_period_value=6,
)
assert round(sharpe_ratio, 4) == 0.8401
def test_sharpe_weekly_freq(self, create_test_data):
data = create_test_data(num=261, frequency=pft.AllFrequencies.W, mu=0.6, sigma=0.7)
ts = pft.TimeSeries(data, "W")
sharpe_ratio = pft.sharpe_ratio(
ts,
risk_free_rate=0.052,
from_date="2017-01-08",
to_date="2021-12-31",
return_period_unit="days",
return_period_value=7,
)
assert round(sharpe_ratio, 4) == 0.4533
sharpe_ratio = pft.sharpe_ratio(
ts,
risk_free_rate=0.052,
from_date="2017-02-05",
to_date="2021-12-31",
return_period_unit="months",
return_period_value=1,
)
assert round(sharpe_ratio, 4) == 0.4898
sharpe_ratio = pft.sharpe_ratio(
ts,
risk_free_rate=0.052,
from_date="2018-01-01",
to_date="2021-12-31",
return_period_unit="months",
return_period_value=12,
)
assert round(sharpe_ratio, 4) == 0.3199
class TestSortino:
def test_sortino_daily_freq(self, create_test_data):
data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.12)
ts = pft.TimeSeries(data, "D")
sortino_ratio = pft.sortino_ratio(
ts,
risk_free_rate=0.06 / 12,
from_date="2017-02-02",
return_period_unit="months",
return_period_value=1,
)
assert round(sortino_ratio, 4) == 1.625
sortino_ratio = pft.sortino_ratio(
ts,
risk_free_rate=0.06,
from_date="2018-01-02",
return_period_unit="years",
return_period_value=1,
)
assert round(sortino_ratio, 4) == 1.2564
def test_sortino_weekly_freq(self, create_test_data):
data = create_test_data(num=500, frequency=pft.AllFrequencies.W, mu=0.12, sigma=0.06)
ts = pft.TimeSeries(data, "W")
sortino = pft.sortino_ratio(
ts,
risk_free_rate=0.06,
return_period_unit="years",
return_period_value=1,
)
assert round(sortino, 4) == -5.5233
sortino = pft.sortino_ratio(
ts,
risk_free_rate=0.052,
from_date="2017-02-05",
to_date="2021-12-31",
return_period_unit="months",
return_period_value=1,
)
assert round(sortino, 4) == -1.93
sortino = pft.sortino_ratio(
ts,
risk_free_rate=0.052,
from_date="2018-01-01",
to_date="2021-12-31",
return_period_unit="months",
return_period_value=12,
)
assert round(sortino, 4) == -3.9805
class TestBeta:
def test_beta_daily_freq(self, create_test_data):
market_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D)
stock_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.08)
sts = pft.TimeSeries(stock_data, "D")
mts = pft.TimeSeries(market_data, "D")
beta = pft.beta(sts, mts, frequency="D", return_period_unit="days", return_period_value=1)
assert round(beta, 4) == 1.5997
def test_beta_daily_freq_daily_returns(self, create_test_data):
market_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D)
stock_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.08)
sts = pft.TimeSeries(stock_data, "D")
mts = pft.TimeSeries(market_data, "D")
beta = pft.beta(sts, mts)
assert round(beta, 4) == 1.6287
def test_beta_monthly_freq(self, create_test_data):
market_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D)
stock_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.08)
sts = pft.TimeSeries(stock_data, "D")
mts = pft.TimeSeries(market_data, "D")
beta = pft.beta(sts, mts, frequency="M")
assert round(beta, 4) == 1.6131
def test_beta_monthly_freq_monthly_returns(self, create_test_data):
market_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D)
stock_data = create_test_data(num=3600, frequency=pft.AllFrequencies.D, mu=0.12, sigma=0.08)
sts = pft.TimeSeries(stock_data, "D")
mts = pft.TimeSeries(market_data, "D")
beta = pft.beta(sts, mts, frequency="M", return_period_unit="months", return_period_value=1)
assert round(beta, 4) == 1.5887

26
tests/test_utils.py Normal file
View File

@ -0,0 +1,26 @@
import datetime
import pytest
from pyfacts.utils import _interval_to_years, _parse_date
class TestParseDate:
def test_parsing(self):
dt = datetime.datetime(2020, 1, 1)
assert _parse_date(dt) == dt
assert _parse_date(dt.strftime("%Y-%m-%d")) == dt
assert _parse_date(datetime.date(2020, 1, 1)) == dt
assert _parse_date("01-01-2020", date_format="%d-%m-%Y") == dt
assert _parse_date("01-01-2020", date_format="%m-%d-%Y") == dt
def test_errors(self):
with pytest.raises(ValueError):
_parse_date("01-01-2020")
with pytest.raises(ValueError):
_parse_date("abcdefg")
class TestIntervalToYears:
def test_months(self):
assert _interval_to_years("months", 6) == 0.5

View File

@ -1,6 +1,11 @@
[tox]
envlist = py39
minversion = 3.8.10
envlist = py38,py39,py310,py311,py312,py313
[testenv]
deps = pytest
commands = pytest
python-dateutil
commands = pytest tests
[flake8]
max-line-length=125