Merge branch 'UserDict-trial' of http://192.168.0.114:3000/buddy/fincal into UserDict-trial

This commit is contained in:
Gourav Kumar 2022-02-22 09:35:23 +05:30
commit 5c5d5b8cc6
6 changed files with 362 additions and 5 deletions

View File

@ -0,0 +1,15 @@
# Fincal
This module simplified handling of time-series data
## The problem
Time series data often have missing data points. These missing points mess things up when you are trying to do a comparison between two sections of a time series.
To make things worse, most libraries don't allow comparison based on dates. Month to Month and year to year comparisons become difficult as they cannot be translated into number of days. However, these are commonly used metrics while looking at financial data.
## The Solution
Fincal aims to simplify things by allowing you to:
* Compare time-series data based on dates
* Easy way to work around missing dates by taking the closest data points
* Completing series with missing data points using forward fill and backward fill
## Examples

View File

@ -0,0 +1,129 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import pandas as pd\n",
"\n",
"from fincal.fincal import TimeSeries\n",
"from fincal.core import Series"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "757eafc2-f804-4e7e-a3b8-2d09cd62e646",
"metadata": {},
"outputs": [],
"source": [
"dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "59b3d4a9-8ef4-4652-9e20-1bac69ab4ff9",
"metadata": {},
"outputs": [],
"source": [
"dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4bc95ae0-8c33-4eab-acf9-e765d22979b8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: The input data contains duplicate dates which have been ignored.\n"
]
}
],
"source": [
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f2c3218c-3984-43d6-8638-41a74a9d0b58",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "getattr(): attribute name must be string",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m<timed eval>:1\u001b[0m, in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n",
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:203\u001b[0m, in \u001b[0;36mTimeSeries.calculate_rolling_returns\u001b[1;34m(self, from_date, to_date, frequency, as_on_match, prior_match, closest, compounding, years)\u001b[0m\n\u001b[0;32m 200\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid argument for frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfrequency\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 203\u001b[0m dates \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_date_series\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfrom_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mto_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m frequency \u001b[38;5;241m==\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mD:\n\u001b[0;32m 205\u001b[0m dates \u001b[38;5;241m=\u001b[39m [i \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m dates \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtime_series]\n",
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:16\u001b[0m, in \u001b[0;36mcreate_date_series\u001b[1;34m(start_date, end_date, frequency, eomonth)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_date_series\u001b[39m(\n\u001b[0;32m 12\u001b[0m start_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, end_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, frequency: \u001b[38;5;28mstr\u001b[39m, eomonth: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 13\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[datetime\u001b[38;5;241m.\u001b[39mdatetime]:\n\u001b[0;32m 14\u001b[0m \u001b[38;5;124;03m\"\"\"Creates a date series using a frequency\"\"\"\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m frequency \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mAllFrequencies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m eomonth \u001b[38;5;129;01mand\u001b[39;00m frequency\u001b[38;5;241m.\u001b[39mdays \u001b[38;5;241m<\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mdays:\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meomonth cannot be set to True if frequency is higher than \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mAllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mTypeError\u001b[0m: getattr(): attribute name must be string"
]
}
],
"source": [
"%%time\n",
"ts.calculate_rolling_returns(from_date='2020-01-01', to_date='2021-01-01')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

15
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}

View File

@ -108,9 +108,9 @@ def _parse_date(date: str, date_format: str = None):
try: try:
date = datetime.datetime.strptime(date, date_format) date = datetime.datetime.strptime(date, date_format)
except TypeError: except TypeError:
raise Exception("Date does not seem to be valid date-like string") raise ValueError("Date does not seem to be valid date-like string")
except ValueError: except ValueError:
raise Exception("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?") raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
return date return date
@ -156,6 +156,12 @@ class Series(UserList):
def __repr__(self): def __repr__(self):
return f"{self.__class__.__name__}({self.data})" return f"{self.__class__.__name__}({self.data})"
def __getitem__(self, i):
if isinstance(i, slice):
return self.__class__(self.data[i], self.dtype)
else:
return self.data[i]
def __gt__(self, other): def __gt__(self, other):
if self.dtype == bool: if self.dtype == bool:
raise TypeError("> not supported for boolean series") raise TypeError("> not supported for boolean series")
@ -299,7 +305,7 @@ class TimeSeriesCore(UserDict):
else: else:
dates_to_return = [self.dates[i] for i, j in enumerate(key) if j] dates_to_return = [self.dates[i] for i, j in enumerate(key) if j]
data_to_return = [(key, self.data[key]) for key in dates_to_return] data_to_return = [(key, self.data[key]) for key in dates_to_return]
return TimeSeriesCore(data_to_return, frequency=self.frequency.symbol) return self.__class__(data_to_return, frequency=self.frequency.symbol)
if isinstance(key, int): if isinstance(key, int):
raise KeyError(f"{key}. For index based slicing, use .iloc[{key}]") raise KeyError(f"{key}. For index based slicing, use .iloc[{key}]")

View File

@ -75,7 +75,7 @@ class TimeSeries(TimeSeriesCore):
self.data = new_ts self.data = new_ts
return None return None
return TimeSeries(new_ts, frequency=self.frequency.symbol) return self.__class__(new_ts, frequency=self.frequency.symbol)
def bfill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]: def bfill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]:
"""Backward fill missing dates in the time series """Backward fill missing dates in the time series
@ -109,7 +109,7 @@ class TimeSeries(TimeSeriesCore):
self.data = new_ts self.data = new_ts
return None return None
return TimeSeries(new_ts, frequency=self.frequency.symbol) return self.__class__(new_ts, frequency=self.frequency.symbol)
def calculate_returns( def calculate_returns(
self, self,

192
testing.ipynb Normal file
View File

@ -0,0 +1,192 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import pandas as pd\n",
"\n",
"from fincal.fincal import TimeSeries\n",
"from fincal.core import Series"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6",
"metadata": {},
"outputs": [],
"source": [
"dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')\n",
"\n",
"dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c52b0c2c-dd01-48dd-9ffa-3147ec9571ef",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: The input data contains duplicate dates which have been ignored.\n"
]
},
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
"\n",
"ts"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9e8ff6c6-3a36-435a-ba87-5b9844c18779",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(datetime.datetime(2022, 1, 31, 0, 0), 85.18),\n",
" (datetime.datetime(2021, 5, 31, 0, 0), 74.85)]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts[['2022-01-31', '2021-05-31']]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4d927a61-0f90-4b47-89b7-0e0d3ab1b442",
"metadata": {},
"outputs": [],
"source": [
"s = ts.dates > '2020-01-01'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f90074f8-5173-49a9-a7d6-ceac01e92431",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2020, 1, 2, 0, 0), 58.285),\n",
"\t (datetime.datetime(2020, 1, 3, 0, 0), 58.056999999999995),\n",
"\t (datetime.datetime(2020, 1, 6, 0, 0), 56.938)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts[s]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 15.6 ms\n",
"Wall time: 13 ms\n"
]
}
],
"source": [
"%%time\n",
"from_date = datetime.date(2020, 1, 1)\n",
"to_date = datetime.date(2021, 1, 1)\n",
"# print(ts.calculate_returns(to_date, years=7))\n",
"rr = ts.calculate_rolling_returns(from_date, to_date)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "086d4377-d1b1-4e51-84c0-39dee28ef75e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"list"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(rr)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}