diff --git a/fincal/core.py b/fincal/core.py index 3b1db0a..ecc30b4 100644 --- a/fincal/core.py +++ b/fincal/core.py @@ -2,13 +2,9 @@ import datetime from collections import UserDict, UserList from dataclasses import dataclass from numbers import Number -from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union +from typing import Iterable, List, Literal, Sequence - -@dataclass -class FincalOptions: - date_format: str = "%Y-%m-%d" - closest: str = "before" # after +from .utils import _parse_date, _preprocess_timeseries @dataclass(frozen=True) @@ -29,114 +25,6 @@ class AllFrequencies: Y = Frequency("annual", "years", 1, 365, "Y") -class DateNotFoundError(Exception): - """Exception to be raised when date is not found""" - - def __init__(self, message, date): - message = f"{message}: {date}" - super().__init__(message) - - -def _parse_date(date: str, date_format: str = None): - """Parses date and handles errors""" - - if isinstance(date, (datetime.datetime, datetime.date)): - return datetime.datetime.fromordinal(date.toordinal()) - - if date_format is None: - date_format = FincalOptions.date_format - - try: - date = datetime.datetime.strptime(date, date_format) - except TypeError: - raise ValueError("Date does not seem to be valid date-like string") - except ValueError: - raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?") - return date - - -def _preprocess_timeseries( - data: Union[ - Sequence[Iterable[Union[str, datetime.datetime, float]]], - Sequence[Mapping[str, Union[float, datetime.datetime]]], - Sequence[Mapping[Union[str, datetime.datetime], float]], - Mapping[Union[str, datetime.datetime], float], - ], - date_format: str, -) -> List[Tuple[datetime.datetime, float]]: - """Converts any type of list to the correct type""" - - if isinstance(data, Mapping): - current_data = [(k, v) for k, v in data.items()] - return _preprocess_timeseries(current_data, date_format) - - if not isinstance(data, Sequence): - raise TypeError("Could not parse the data") - - if isinstance(data[0], Sequence): - return sorted([(_parse_date(i, date_format), j) for i, j in data]) - - if not isinstance(data[0], Mapping): - raise TypeError("Could not parse the data") - - if len(data[0]) == 1: - current_data = [tuple(*i.items()) for i in data] - elif len(data[0]) == 2: - current_data = [tuple(i.values()) for i in data] - else: - raise TypeError("Could not parse the data") - return _preprocess_timeseries(current_data, date_format) - - -def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta: - """Checks the arguments and returns appropriate timedelta objects""" - - deltas = {"exact": 0, "previous": -1, "next": 1} - if closest not in deltas.keys(): - raise ValueError(f"Invalid argument for closest: {closest}") - - as_on_match = closest if as_on_match == "closest" else as_on_match - prior_match = closest if prior_match == "closest" else prior_match - - if as_on_match in deltas.keys(): - as_on_delta = datetime.timedelta(days=deltas[as_on_match]) - else: - raise ValueError(f"Invalid as_on_match argument: {as_on_match}") - - if prior_match in deltas.keys(): - prior_delta = datetime.timedelta(days=deltas[prior_match]) - else: - raise ValueError(f"Invalid prior_match argument: {prior_match}") - - return as_on_delta, prior_delta - - -def _find_closest_date(data, date, delta, if_not_found): - """Helper function to find data for the closest available date""" - - row = data.get(date, None) - if row is not None: - return date, row - - if delta: - return _find_closest_date(data, date + delta, delta, if_not_found) - - if if_not_found == "fail": - raise DateNotFoundError("Data not found for date", date) - if if_not_found == "nan": - return date, float("NaN") - - raise ValueError(f"Invalid argument for if_not_found: {if_not_found}") - - -def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> int: - """Converts any time period to years for use with compounding functions""" - - year_conversion_factor = {"years": 1, "months": 12, "days": 365} - years = interval_value / year_conversion_factor[interval_type] - return years - - class _IndexSlicer: """Class to create a slice using iloc in TimeSeriesCore""" diff --git a/fincal/exceptions.py b/fincal/exceptions.py new file mode 100644 index 0000000..2561952 --- /dev/null +++ b/fincal/exceptions.py @@ -0,0 +1,6 @@ +class DateNotFoundError(Exception): + """Exception to be raised when date is not found""" + + def __init__(self, message, date): + message = f"{message}: {date}" + super().__init__(message) diff --git a/fincal/fincal.py b/fincal/fincal.py index 5486d1b..036a3cd 100644 --- a/fincal/fincal.py +++ b/fincal/fincal.py @@ -5,9 +5,8 @@ from typing import List, Literal, Union from dateutil.relativedelta import relativedelta -from .core import ( - AllFrequencies, - TimeSeriesCore, +from .core import AllFrequencies, TimeSeriesCore +from .utils import ( _find_closest_date, _interval_to_years, _parse_date, diff --git a/fincal/utils.py b/fincal/utils.py new file mode 100644 index 0000000..097199c --- /dev/null +++ b/fincal/utils.py @@ -0,0 +1,111 @@ +import datetime +from dataclasses import dataclass +from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union + +from .exceptions import DateNotFoundError + + +@dataclass +class FincalOptions: + date_format: str = "%Y-%m-%d" + closest: str = "before" # after + + +def _parse_date(date: str, date_format: str = None): + """Parses date and handles errors""" + + if isinstance(date, (datetime.datetime, datetime.date)): + return datetime.datetime.fromordinal(date.toordinal()) + + if date_format is None: + date_format = FincalOptions.date_format + + try: + date = datetime.datetime.strptime(date, date_format) + except TypeError: + raise ValueError("Date does not seem to be valid date-like string") + except ValueError: + raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?") + return date + + +def _preprocess_timeseries( + data: Union[ + Sequence[Iterable[Union[str, datetime.datetime, float]]], + Sequence[Mapping[str, Union[float, datetime.datetime]]], + Sequence[Mapping[Union[str, datetime.datetime], float]], + Mapping[Union[str, datetime.datetime], float], + ], + date_format: str, +) -> List[Tuple[datetime.datetime, float]]: + """Converts any type of list to the correct type""" + + if isinstance(data, Mapping): + current_data = [(k, v) for k, v in data.items()] + return _preprocess_timeseries(current_data, date_format) + + if not isinstance(data, Sequence): + raise TypeError("Could not parse the data") + + if isinstance(data[0], Sequence): + return sorted([(_parse_date(i, date_format), j) for i, j in data]) + + if not isinstance(data[0], Mapping): + raise TypeError("Could not parse the data") + + if len(data[0]) == 1: + current_data = [tuple(*i.items()) for i in data] + elif len(data[0]) == 2: + current_data = [tuple(i.values()) for i in data] + else: + raise TypeError("Could not parse the data") + return _preprocess_timeseries(current_data, date_format) + + +def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta: + """Checks the arguments and returns appropriate timedelta objects""" + + deltas = {"exact": 0, "previous": -1, "next": 1} + if closest not in deltas.keys(): + raise ValueError(f"Invalid argument for closest: {closest}") + + as_on_match = closest if as_on_match == "closest" else as_on_match + prior_match = closest if prior_match == "closest" else prior_match + + if as_on_match in deltas.keys(): + as_on_delta = datetime.timedelta(days=deltas[as_on_match]) + else: + raise ValueError(f"Invalid as_on_match argument: {as_on_match}") + + if prior_match in deltas.keys(): + prior_delta = datetime.timedelta(days=deltas[prior_match]) + else: + raise ValueError(f"Invalid prior_match argument: {prior_match}") + + return as_on_delta, prior_delta + + +def _find_closest_date(data, date, delta, if_not_found): + """Helper function to find data for the closest available date""" + + row = data.get(date, None) + if row is not None: + return date, row + + if delta: + return _find_closest_date(data, date + delta, delta, if_not_found) + + if if_not_found == "fail": + raise DateNotFoundError("Data not found for date", date) + if if_not_found == "nan": + return date, float("NaN") + + raise ValueError(f"Invalid argument for if_not_found: {if_not_found}") + + +def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> int: + """Converts any time period to years for use with compounding functions""" + + year_conversion_factor = {"years": 1, "months": 12, "days": 365} + years = interval_value / year_conversion_factor[interval_type] + return years diff --git a/testing.ipynb b/testing.ipynb index 51311f3..c036e9b 100644 --- a/testing.ipynb +++ b/testing.ipynb @@ -20,11 +20,19 @@ "id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: total: 125 ms\n", + "Wall time: 99 ms\n" + ] + }, { "data": { "text/plain": [ "[(datetime.datetime(2022, 1, 31, 0, 0), 310.980011),\n", - " (datetime.datetime(2021, 5, 28, 0, 0), 249.679993)]" + " (datetime.datetime(2021, 5, 28, 0, 0), 249.67999300000002)]" ] }, "execution_count": 2, @@ -33,6 +41,7 @@ } ], "source": [ + "%%time\n", "dfd = pd.read_csv('test_files/msft.csv')\n", "# dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)\n", "ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n", @@ -40,6 +49,27 @@ "ts[['2022-01-31', '2021-05-28']]" ] }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ffd9665d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(datetime.datetime(2022, 1, 31, 0, 0), 310.980011)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts['2022-01-31']" + ] + }, { "cell_type": "code", "execution_count": 3, @@ -50,7 +80,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Wall time: 17 ms\n" + "CPU times: total: 15.6 ms\n", + "Wall time: 16 ms\n" ] }, { @@ -86,7 +117,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Wall time: 5.97 ms\n" + "CPU times: total: 15.6 ms\n", + "Wall time: 4 ms\n" ] }, { @@ -95,7 +127,7 @@ "[(datetime.datetime(1992, 2, 19, 0, 0), 2.398438),\n", " (datetime.datetime(1992, 2, 20, 0, 0), 2.447917),\n", " (datetime.datetime(1992, 2, 21, 0, 0), 2.385417),\n", - " (datetime.datetime(1992, 2, 24, 0, 0), 2.393229),\n", + " (datetime.datetime(1992, 2, 24, 0, 0), 2.3932290000000003),\n", " (datetime.datetime(1992, 2, 25, 0, 0), 2.411458),\n", " (datetime.datetime(1992, 2, 26, 0, 0), 2.541667),\n", " (datetime.datetime(1992, 2, 27, 0, 0), 2.601563),\n", @@ -116,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "id": "dc469722-c816-4b57-8d91-7a3b865f86be", "metadata": { "tags": [] @@ -126,7 +158,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Wall time: 311 ms\n" + "CPU times: total: 297 ms\n", + "Wall time: 290 ms\n" ] } ], @@ -186,6 +219,70 @@ "sr = Series([1, 2, 3, 4, 5], 'number')\n", "sr" ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "adceda69", + "metadata": {}, + "outputs": [], + "source": [ + "from fincal.fincal import TimeSeries\n", + "import datetime\n", + "ts = TimeSeries(data = [('2021-01-01', 220), ('2021-02-01', 230), ('2021-03-01', 240)], frequency='M')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "68cf9f8c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(datetime.datetime(2021, 2, 1, 0, 0), 0.045454545454545414)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts.calculate_returns('2021-02-05', interval_type='months', interval_value=1, compounding=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a583347f", + "metadata": {}, + "outputs": [], + "source": [ + "D = {'a': 1, 'b': 2}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f79ac787", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['a', 'b'])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "D.keys()" + ] } ], "metadata": { @@ -204,7 +301,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.8.3" } }, "nbformat": 4, diff --git a/tests/test_fincal.py b/tests/test_fincal.py index 9dc4120..c2a6f37 100644 --- a/tests/test_fincal.py +++ b/tests/test_fincal.py @@ -4,8 +4,10 @@ import random from typing import Literal, Sequence import pytest -from fincal.core import DateNotFoundError, FincalOptions, Frequency, Series +from fincal.core import Frequency, Series +from fincal.exceptions import DateNotFoundError from fincal.fincal import TimeSeries, create_date_series +from fincal.utils import FincalOptions THIS_DIR = os.path.dirname(os.path.abspath(__file__)) sample_data_path = os.path.join(THIS_DIR, "data")