separated utility functions and exceptions in standalone files

2022-02-26 12:46:42 +05:30 · 2022-02-26 12:46:42 +05:30 · 7cac5cc307
commit 7cac5cc307
parent ef2973a1d1
6 changed files with 228 additions and 125 deletions
--- a/fincal/core.py
+++ b/fincal/core.py
@ -2,13 +2,9 @@ import datetime
 from collections import UserDict, UserList
 from dataclasses import dataclass
 from numbers import Number
-from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
+from typing import Iterable, List, Literal, Sequence

-
-@dataclass
-class FincalOptions:
-    date_format: str = "%Y-%m-%d"
-    closest: str = "before"  # after
+from .utils import _parse_date, _preprocess_timeseries


@dataclass(frozen=True)
@ -29,114 +25,6 @@ class AllFrequencies:
    Y = Frequency("annual", "years", 1, 365, "Y")


-class DateNotFoundError(Exception):
-    """Exception to be raised when date is not found"""
-
-    def __init__(self, message, date):
-        message = f"{message}: {date}"
-        super().__init__(message)
-
-
-def _parse_date(date: str, date_format: str = None):
-    """Parses date and handles errors"""
-
-    if isinstance(date, (datetime.datetime, datetime.date)):
-        return datetime.datetime.fromordinal(date.toordinal())
-
-    if date_format is None:
-        date_format = FincalOptions.date_format
-
-    try:
-        date = datetime.datetime.strptime(date, date_format)
-    except TypeError:
-        raise ValueError("Date does not seem to be valid date-like string")
-    except ValueError:
-        raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
-    return date
-
-
-def _preprocess_timeseries(
-    data: Union[
-        Sequence[Iterable[Union[str, datetime.datetime, float]]],
-        Sequence[Mapping[str, Union[float, datetime.datetime]]],
-        Sequence[Mapping[Union[str, datetime.datetime], float]],
-        Mapping[Union[str, datetime.datetime], float],
-    ],
-    date_format: str,
-) -> List[Tuple[datetime.datetime, float]]:
-    """Converts any type of list to the correct type"""
-
-    if isinstance(data, Mapping):
-        current_data = [(k, v) for k, v in data.items()]
-        return _preprocess_timeseries(current_data, date_format)
-
-    if not isinstance(data, Sequence):
-        raise TypeError("Could not parse the data")
-
-    if isinstance(data[0], Sequence):
-        return sorted([(_parse_date(i, date_format), j) for i, j in data])
-
-    if not isinstance(data[0], Mapping):
-        raise TypeError("Could not parse the data")
-
-    if len(data[0]) == 1:
-        current_data = [tuple(*i.items()) for i in data]
-    elif len(data[0]) == 2:
-        current_data = [tuple(i.values()) for i in data]
-    else:
-        raise TypeError("Could not parse the data")
-    return _preprocess_timeseries(current_data, date_format)
-
-
-def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta:
-    """Checks the arguments and returns appropriate timedelta objects"""
-
-    deltas = {"exact": 0, "previous": -1, "next": 1}
-    if closest not in deltas.keys():
-        raise ValueError(f"Invalid argument for closest: {closest}")
-
-    as_on_match = closest if as_on_match == "closest" else as_on_match
-    prior_match = closest if prior_match == "closest" else prior_match
-
-    if as_on_match in deltas.keys():
-        as_on_delta = datetime.timedelta(days=deltas[as_on_match])
-    else:
-        raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
-
-    if prior_match in deltas.keys():
-        prior_delta = datetime.timedelta(days=deltas[prior_match])
-    else:
-        raise ValueError(f"Invalid prior_match argument: {prior_match}")
-
-    return as_on_delta, prior_delta
-
-
-def _find_closest_date(data, date, delta, if_not_found):
-    """Helper function to find data for the closest available date"""
-
-    row = data.get(date, None)
-    if row is not None:
-        return date, row
-
-    if delta:
-        return _find_closest_date(data, date + delta, delta, if_not_found)
-
-    if if_not_found == "fail":
-        raise DateNotFoundError("Data not found for date", date)
-    if if_not_found == "nan":
-        return date, float("NaN")
-
-    raise ValueError(f"Invalid argument for if_not_found: {if_not_found}")
-
-
-def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> int:
-    """Converts any time period to years for use with compounding functions"""
-
-    year_conversion_factor = {"years": 1, "months": 12, "days": 365}
-    years = interval_value / year_conversion_factor[interval_type]
-    return years
-
-
 class _IndexSlicer:
    """Class to create a slice using iloc in TimeSeriesCore"""

--- a/fincal/exceptions.py
+++ b/fincal/exceptions.py
@ -0,0 +1,6 @@
+class DateNotFoundError(Exception):
+    """Exception to be raised when date is not found"""
+
+    def __init__(self, message, date):
+        message = f"{message}: {date}"
+        super().__init__(message)
--- a/fincal/fincal.py
+++ b/fincal/fincal.py
@ -5,9 +5,8 @@ from typing import List, Literal, Union

 from dateutil.relativedelta import relativedelta

-from .core import (
-    AllFrequencies,
-    TimeSeriesCore,
+from .core import AllFrequencies, TimeSeriesCore
+from .utils import (
    _find_closest_date,
    _interval_to_years,
    _parse_date,
--- a/fincal/utils.py
+++ b/fincal/utils.py
@ -0,0 +1,111 @@
+import datetime
+from dataclasses import dataclass
+from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
+
+from .exceptions import DateNotFoundError
+
+
+@dataclass
+class FincalOptions:
+    date_format: str = "%Y-%m-%d"
+    closest: str = "before"  # after
+
+
+def _parse_date(date: str, date_format: str = None):
+    """Parses date and handles errors"""
+
+    if isinstance(date, (datetime.datetime, datetime.date)):
+        return datetime.datetime.fromordinal(date.toordinal())
+
+    if date_format is None:
+        date_format = FincalOptions.date_format
+
+    try:
+        date = datetime.datetime.strptime(date, date_format)
+    except TypeError:
+        raise ValueError("Date does not seem to be valid date-like string")
+    except ValueError:
+        raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
+    return date
+
+
+def _preprocess_timeseries(
+    data: Union[
+        Sequence[Iterable[Union[str, datetime.datetime, float]]],
+        Sequence[Mapping[str, Union[float, datetime.datetime]]],
+        Sequence[Mapping[Union[str, datetime.datetime], float]],
+        Mapping[Union[str, datetime.datetime], float],
+    ],
+    date_format: str,
+) -> List[Tuple[datetime.datetime, float]]:
+    """Converts any type of list to the correct type"""
+
+    if isinstance(data, Mapping):
+        current_data = [(k, v) for k, v in data.items()]
+        return _preprocess_timeseries(current_data, date_format)
+
+    if not isinstance(data, Sequence):
+        raise TypeError("Could not parse the data")
+
+    if isinstance(data[0], Sequence):
+        return sorted([(_parse_date(i, date_format), j) for i, j in data])
+
+    if not isinstance(data[0], Mapping):
+        raise TypeError("Could not parse the data")
+
+    if len(data[0]) == 1:
+        current_data = [tuple(*i.items()) for i in data]
+    elif len(data[0]) == 2:
+        current_data = [tuple(i.values()) for i in data]
+    else:
+        raise TypeError("Could not parse the data")
+    return _preprocess_timeseries(current_data, date_format)
+
+
+def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta:
+    """Checks the arguments and returns appropriate timedelta objects"""
+
+    deltas = {"exact": 0, "previous": -1, "next": 1}
+    if closest not in deltas.keys():
+        raise ValueError(f"Invalid argument for closest: {closest}")
+
+    as_on_match = closest if as_on_match == "closest" else as_on_match
+    prior_match = closest if prior_match == "closest" else prior_match
+
+    if as_on_match in deltas.keys():
+        as_on_delta = datetime.timedelta(days=deltas[as_on_match])
+    else:
+        raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
+
+    if prior_match in deltas.keys():
+        prior_delta = datetime.timedelta(days=deltas[prior_match])
+    else:
+        raise ValueError(f"Invalid prior_match argument: {prior_match}")
+
+    return as_on_delta, prior_delta
+
+
+def _find_closest_date(data, date, delta, if_not_found):
+    """Helper function to find data for the closest available date"""
+
+    row = data.get(date, None)
+    if row is not None:
+        return date, row
+
+    if delta:
+        return _find_closest_date(data, date + delta, delta, if_not_found)
+
+    if if_not_found == "fail":
+        raise DateNotFoundError("Data not found for date", date)
+    if if_not_found == "nan":
+        return date, float("NaN")
+
+    raise ValueError(f"Invalid argument for if_not_found: {if_not_found}")
+
+
+def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> int:
+    """Converts any time period to years for use with compounding functions"""
+
+    year_conversion_factor = {"years": 1, "months": 12, "days": 365}
+    years = interval_value / year_conversion_factor[interval_type]
+    return years
--- a/testing.ipynb
+++ b/testing.ipynb
@ -20,11 +20,19 @@
   "id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6",
   "metadata": {},
   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: total: 125 ms\n",
+      "Wall time: 99 ms\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
       "[(datetime.datetime(2022, 1, 31, 0, 0), 310.980011),\n",
-       " (datetime.datetime(2021, 5, 28, 0, 0), 249.679993)]"
+       " (datetime.datetime(2021, 5, 28, 0, 0), 249.67999300000002)]"
      ]
     },
     "execution_count": 2,
@ -33,6 +41,7 @@
    }
   ],
   "source": [
+    "%%time\n",
    "dfd = pd.read_csv('test_files/msft.csv')\n",
    "# dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)\n",
    "ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
@ -40,6 +49,27 @@
    "ts[['2022-01-31', '2021-05-28']]"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "ffd9665d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(datetime.datetime(2022, 1, 31, 0, 0), 310.980011)"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ts['2022-01-31']"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 3,
@ -50,7 +80,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Wall time: 17 ms\n"
+      "CPU times: total: 15.6 ms\n",
+      "Wall time: 16 ms\n"
     ]
    },
    {
@ -86,7 +117,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Wall time: 5.97 ms\n"
+      "CPU times: total: 15.6 ms\n",
+      "Wall time: 4 ms\n"
     ]
    },
    {
@ -95,7 +127,7 @@
       "[(datetime.datetime(1992, 2, 19, 0, 0), 2.398438),\n",
       " (datetime.datetime(1992, 2, 20, 0, 0), 2.447917),\n",
       " (datetime.datetime(1992, 2, 21, 0, 0), 2.385417),\n",
-       " (datetime.datetime(1992, 2, 24, 0, 0), 2.393229),\n",
+       " (datetime.datetime(1992, 2, 24, 0, 0), 2.3932290000000003),\n",
       " (datetime.datetime(1992, 2, 25, 0, 0), 2.411458),\n",
       " (datetime.datetime(1992, 2, 26, 0, 0), 2.541667),\n",
       " (datetime.datetime(1992, 2, 27, 0, 0), 2.601563),\n",
@ -116,7 +148,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 11,
   "id": "dc469722-c816-4b57-8d91-7a3b865f86be",
   "metadata": {
    "tags": []
@ -126,7 +158,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Wall time: 311 ms\n"
+      "CPU times: total: 297 ms\n",
+      "Wall time: 290 ms\n"
     ]
    }
   ],
@ -186,6 +219,70 @@
    "sr = Series([1, 2, 3, 4, 5], 'number')\n",
    "sr"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "adceda69",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from fincal.fincal import TimeSeries\n",
+    "import datetime\n",
+    "ts = TimeSeries(data = [('2021-01-01', 220), ('2021-02-01', 230), ('2021-03-01', 240)], frequency='M')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "68cf9f8c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(datetime.datetime(2021, 2, 1, 0, 0), 0.045454545454545414)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ts.calculate_returns('2021-02-05', interval_type='months', interval_value=1, compounding=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a583347f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "D = {'a': 1, 'b': 2}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "f79ac787",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['a', 'b'])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "D.keys()"
+   ]
  }
 ],
 "metadata": {
@ -204,7 +301,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.2"
+   "version": "3.8.3"
  }
 },
 "nbformat": 4,
--- a/tests/test_fincal.py
+++ b/tests/test_fincal.py
@ -4,8 +4,10 @@ import random
 from typing import Literal, Sequence

 import pytest
-from fincal.core import DateNotFoundError, FincalOptions, Frequency, Series
+from fincal.core import Frequency, Series
+from fincal.exceptions import DateNotFoundError
 from fincal.fincal import TimeSeries, create_date_series
+from fincal.utils import FincalOptions

 THIS_DIR = os.path.dirname(os.path.abspath(__file__))
 sample_data_path = os.path.join(THIS_DIR, "data")