diff --git a/.ipynb_checkpoints/README-checkpoint.md b/.ipynb_checkpoints/README-checkpoint.md new file mode 100644 index 0000000..a2d8afa --- /dev/null +++ b/.ipynb_checkpoints/README-checkpoint.md @@ -0,0 +1,15 @@ +# Fincal +This module simplified handling of time-series data + +## The problem +Time series data often have missing data points. These missing points mess things up when you are trying to do a comparison between two sections of a time series. + +To make things worse, most libraries don't allow comparison based on dates. Month to Month and year to year comparisons become difficult as they cannot be translated into number of days. However, these are commonly used metrics while looking at financial data. + +## The Solution +Fincal aims to simplify things by allowing you to: +* Compare time-series data based on dates +* Easy way to work around missing dates by taking the closest data points +* Completing series with missing data points using forward fill and backward fill + +## Examples \ No newline at end of file diff --git a/.ipynb_checkpoints/testing-checkpoint.ipynb b/.ipynb_checkpoints/testing-checkpoint.ipynb new file mode 100644 index 0000000..05fb9fa --- /dev/null +++ b/.ipynb_checkpoints/testing-checkpoint.ipynb @@ -0,0 +1,129 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5", + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import pandas as pd\n", + "\n", + "from fincal.fincal import TimeSeries\n", + "from fincal.core import Series" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "757eafc2-f804-4e7e-a3b8-2d09cd62e646", + "metadata": {}, + "outputs": [], + "source": [ + "dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "59b3d4a9-8ef4-4652-9e20-1bac69ab4ff9", + "metadata": {}, + "outputs": [], + "source": [ + "dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "4bc95ae0-8c33-4eab-acf9-e765d22979b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warning: The input data contains duplicate dates which have been ignored.\n" + ] + } + ], + "source": [ + "ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "f2c3218c-3984-43d6-8638-41a74a9d0b58", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n", + "\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n", + "\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n", + "\t ...\n", + "\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n", + "\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n", + "\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "dc469722-c816-4b57-8d91-7a3b865f86be", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "getattr(): attribute name must be string", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m:1\u001b[0m, in \u001b[0;36m\u001b[1;34m\u001b[0m\n", + "File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:203\u001b[0m, in \u001b[0;36mTimeSeries.calculate_rolling_returns\u001b[1;34m(self, from_date, to_date, frequency, as_on_match, prior_match, closest, compounding, years)\u001b[0m\n\u001b[0;32m 200\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid argument for frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfrequency\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 203\u001b[0m dates \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_date_series\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfrom_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mto_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m frequency \u001b[38;5;241m==\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mD:\n\u001b[0;32m 205\u001b[0m dates \u001b[38;5;241m=\u001b[39m [i \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m dates \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtime_series]\n", + "File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:16\u001b[0m, in \u001b[0;36mcreate_date_series\u001b[1;34m(start_date, end_date, frequency, eomonth)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_date_series\u001b[39m(\n\u001b[0;32m 12\u001b[0m start_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, end_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, frequency: \u001b[38;5;28mstr\u001b[39m, eomonth: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 13\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[datetime\u001b[38;5;241m.\u001b[39mdatetime]:\n\u001b[0;32m 14\u001b[0m \u001b[38;5;124;03m\"\"\"Creates a date series using a frequency\"\"\"\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m frequency \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mAllFrequencies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m eomonth \u001b[38;5;129;01mand\u001b[39;00m frequency\u001b[38;5;241m.\u001b[39mdays \u001b[38;5;241m<\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mdays:\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meomonth cannot be set to True if frequency is higher than \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mAllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[1;31mTypeError\u001b[0m: getattr(): attribute name must be string" + ] + } + ], + "source": [ + "%%time\n", + "ts.calculate_rolling_returns(from_date='2020-01-01', to_date='2021-01-01')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..17e15f2 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/fincal/core.py b/fincal/core.py index 6d7f0da..d8866a0 100644 --- a/fincal/core.py +++ b/fincal/core.py @@ -108,9 +108,9 @@ def _parse_date(date: str, date_format: str = None): try: date = datetime.datetime.strptime(date, date_format) except TypeError: - raise Exception("Date does not seem to be valid date-like string") + raise ValueError("Date does not seem to be valid date-like string") except ValueError: - raise Exception("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?") + raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?") return date @@ -156,6 +156,12 @@ class Series(UserList): def __repr__(self): return f"{self.__class__.__name__}({self.data})" + def __getitem__(self, i): + if isinstance(i, slice): + return self.__class__(self.data[i], self.dtype) + else: + return self.data[i] + def __gt__(self, other): if self.dtype == bool: raise TypeError("> not supported for boolean series") @@ -299,7 +305,7 @@ class TimeSeriesCore(UserDict): else: dates_to_return = [self.dates[i] for i, j in enumerate(key) if j] data_to_return = [(key, self.data[key]) for key in dates_to_return] - return TimeSeriesCore(data_to_return, frequency=self.frequency.symbol) + return self.__class__(data_to_return, frequency=self.frequency.symbol) if isinstance(key, int): raise KeyError(f"{key}. For index based slicing, use .iloc[{key}]") diff --git a/fincal/fincal.py b/fincal/fincal.py index d03fd26..d7b8eed 100644 --- a/fincal/fincal.py +++ b/fincal/fincal.py @@ -75,7 +75,7 @@ class TimeSeries(TimeSeriesCore): self.data = new_ts return None - return TimeSeries(new_ts, frequency=self.frequency.symbol) + return self.__class__(new_ts, frequency=self.frequency.symbol) def bfill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]: """Backward fill missing dates in the time series @@ -109,7 +109,7 @@ class TimeSeries(TimeSeriesCore): self.data = new_ts return None - return TimeSeries(new_ts, frequency=self.frequency.symbol) + return self.__class__(new_ts, frequency=self.frequency.symbol) def calculate_returns( self, diff --git a/testing.ipynb b/testing.ipynb new file mode 100644 index 0000000..3edcdf8 --- /dev/null +++ b/testing.ipynb @@ -0,0 +1,192 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5", + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import pandas as pd\n", + "\n", + "from fincal.fincal import TimeSeries\n", + "from fincal.core import Series" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6", + "metadata": {}, + "outputs": [], + "source": [ + "dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')\n", + "\n", + "dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c52b0c2c-dd01-48dd-9ffa-3147ec9571ef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warning: The input data contains duplicate dates which have been ignored.\n" + ] + }, + { + "data": { + "text/plain": [ + "TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n", + "\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n", + "\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n", + "\t ...\n", + "\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n", + "\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n", + "\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n", + "\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9e8ff6c6-3a36-435a-ba87-5b9844c18779", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(datetime.datetime(2022, 1, 31, 0, 0), 85.18),\n", + " (datetime.datetime(2021, 5, 31, 0, 0), 74.85)]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[['2022-01-31', '2021-05-31']]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4d927a61-0f90-4b47-89b7-0e0d3ab1b442", + "metadata": {}, + "outputs": [], + "source": [ + "s = ts.dates > '2020-01-01'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f90074f8-5173-49a9-a7d6-ceac01e92431", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TimeSeries([(datetime.datetime(2020, 1, 2, 0, 0), 58.285),\n", + "\t (datetime.datetime(2020, 1, 3, 0, 0), 58.056999999999995),\n", + "\t (datetime.datetime(2020, 1, 6, 0, 0), 56.938)\n", + "\t ...\n", + "\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n", + "\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n", + "\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[s]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "dc469722-c816-4b57-8d91-7a3b865f86be", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: total: 15.6 ms\n", + "Wall time: 13 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "from_date = datetime.date(2020, 1, 1)\n", + "to_date = datetime.date(2021, 1, 1)\n", + "# print(ts.calculate_returns(to_date, years=7))\n", + "rr = ts.calculate_rolling_returns(from_date, to_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "086d4377-d1b1-4e51-84c0-39dee28ef75e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(rr)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}