Browse Source

separated utility functions and exceptions in standalone files

switch-to-decimal
Gourav Kumar 2 years ago
parent
commit
7cac5cc307
  1. 116
      fincal/core.py
  2. 6
      fincal/exceptions.py
  3. 5
      fincal/fincal.py
  4. 111
      fincal/utils.py
  5. 111
      testing.ipynb
  6. 4
      tests/test_fincal.py

116
fincal/core.py

@ -2,13 +2,9 @@ import datetime
from collections import UserDict, UserList
from dataclasses import dataclass
from numbers import Number
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
from typing import Iterable, List, Literal, Sequence
@dataclass
class FincalOptions:
date_format: str = "%Y-%m-%d"
closest: str = "before" # after
from .utils import _parse_date, _preprocess_timeseries
@dataclass(frozen=True)
@ -29,114 +25,6 @@ class AllFrequencies:
Y = Frequency("annual", "years", 1, 365, "Y")
class DateNotFoundError(Exception):
"""Exception to be raised when date is not found"""
def __init__(self, message, date):
message = f"{message}: {date}"
super().__init__(message)
def _parse_date(date: str, date_format: str = None):
"""Parses date and handles errors"""
if isinstance(date, (datetime.datetime, datetime.date)):
return datetime.datetime.fromordinal(date.toordinal())
if date_format is None:
date_format = FincalOptions.date_format
try:
date = datetime.datetime.strptime(date, date_format)
except TypeError:
raise ValueError("Date does not seem to be valid date-like string")
except ValueError:
raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
return date
def _preprocess_timeseries(
data: Union[
Sequence[Iterable[Union[str, datetime.datetime, float]]],
Sequence[Mapping[str, Union[float, datetime.datetime]]],
Sequence[Mapping[Union[str, datetime.datetime], float]],
Mapping[Union[str, datetime.datetime], float],
],
date_format: str,
) -> List[Tuple[datetime.datetime, float]]:
"""Converts any type of list to the correct type"""
if isinstance(data, Mapping):
current_data = [(k, v) for k, v in data.items()]
return _preprocess_timeseries(current_data, date_format)
if not isinstance(data, Sequence):
raise TypeError("Could not parse the data")
if isinstance(data[0], Sequence):
return sorted([(_parse_date(i, date_format), j) for i, j in data])
if not isinstance(data[0], Mapping):
raise TypeError("Could not parse the data")
if len(data[0]) == 1:
current_data = [tuple(*i.items()) for i in data]
elif len(data[0]) == 2:
current_data = [tuple(i.values()) for i in data]
else:
raise TypeError("Could not parse the data")
return _preprocess_timeseries(current_data, date_format)
def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta:
"""Checks the arguments and returns appropriate timedelta objects"""
deltas = {"exact": 0, "previous": -1, "next": 1}
if closest not in deltas.keys():
raise ValueError(f"Invalid argument for closest: {closest}")
as_on_match = closest if as_on_match == "closest" else as_on_match
prior_match = closest if prior_match == "closest" else prior_match
if as_on_match in deltas.keys():
as_on_delta = datetime.timedelta(days=deltas[as_on_match])
else:
raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
if prior_match in deltas.keys():
prior_delta = datetime.timedelta(days=deltas[prior_match])
else:
raise ValueError(f"Invalid prior_match argument: {prior_match}")
return as_on_delta, prior_delta
def _find_closest_date(data, date, delta, if_not_found):
"""Helper function to find data for the closest available date"""
row = data.get(date, None)
if row is not None:
return date, row
if delta:
return _find_closest_date(data, date + delta, delta, if_not_found)
if if_not_found == "fail":
raise DateNotFoundError("Data not found for date", date)
if if_not_found == "nan":
return date, float("NaN")
raise ValueError(f"Invalid argument for if_not_found: {if_not_found}")
def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> int:
"""Converts any time period to years for use with compounding functions"""
year_conversion_factor = {"years": 1, "months": 12, "days": 365}
years = interval_value / year_conversion_factor[interval_type]
return years
class _IndexSlicer:
"""Class to create a slice using iloc in TimeSeriesCore"""

6
fincal/exceptions.py

@ -0,0 +1,6 @@
class DateNotFoundError(Exception):
"""Exception to be raised when date is not found"""
def __init__(self, message, date):
message = f"{message}: {date}"
super().__init__(message)

5
fincal/fincal.py

@ -5,9 +5,8 @@ from typing import List, Literal, Union
from dateutil.relativedelta import relativedelta
from .core import (
AllFrequencies,
TimeSeriesCore,
from .core import AllFrequencies, TimeSeriesCore
from .utils import (
_find_closest_date,
_interval_to_years,
_parse_date,

111
fincal/utils.py

@ -0,0 +1,111 @@
import datetime
from dataclasses import dataclass
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
from .exceptions import DateNotFoundError
@dataclass
class FincalOptions:
date_format: str = "%Y-%m-%d"
closest: str = "before" # after
def _parse_date(date: str, date_format: str = None):
"""Parses date and handles errors"""
if isinstance(date, (datetime.datetime, datetime.date)):
return datetime.datetime.fromordinal(date.toordinal())
if date_format is None:
date_format = FincalOptions.date_format
try:
date = datetime.datetime.strptime(date, date_format)
except TypeError:
raise ValueError("Date does not seem to be valid date-like string")
except ValueError:
raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
return date
def _preprocess_timeseries(
data: Union[
Sequence[Iterable[Union[str, datetime.datetime, float]]],
Sequence[Mapping[str, Union[float, datetime.datetime]]],
Sequence[Mapping[Union[str, datetime.datetime], float]],
Mapping[Union[str, datetime.datetime], float],
],
date_format: str,
) -> List[Tuple[datetime.datetime, float]]:
"""Converts any type of list to the correct type"""
if isinstance(data, Mapping):
current_data = [(k, v) for k, v in data.items()]
return _preprocess_timeseries(current_data, date_format)
if not isinstance(data, Sequence):
raise TypeError("Could not parse the data")
if isinstance(data[0], Sequence):
return sorted([(_parse_date(i, date_format), j) for i, j in data])
if not isinstance(data[0], Mapping):
raise TypeError("Could not parse the data")
if len(data[0]) == 1:
current_data = [tuple(*i.items()) for i in data]
elif len(data[0]) == 2:
current_data = [tuple(i.values()) for i in data]
else:
raise TypeError("Could not parse the data")
return _preprocess_timeseries(current_data, date_format)
def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta:
"""Checks the arguments and returns appropriate timedelta objects"""
deltas = {"exact": 0, "previous": -1, "next": 1}
if closest not in deltas.keys():
raise ValueError(f"Invalid argument for closest: {closest}")
as_on_match = closest if as_on_match == "closest" else as_on_match
prior_match = closest if prior_match == "closest" else prior_match
if as_on_match in deltas.keys():
as_on_delta = datetime.timedelta(days=deltas[as_on_match])
else:
raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
if prior_match in deltas.keys():
prior_delta = datetime.timedelta(days=deltas[prior_match])
else:
raise ValueError(f"Invalid prior_match argument: {prior_match}")
return as_on_delta, prior_delta
def _find_closest_date(data, date, delta, if_not_found):
"""Helper function to find data for the closest available date"""
row = data.get(date, None)
if row is not None:
return date, row
if delta:
return _find_closest_date(data, date + delta, delta, if_not_found)
if if_not_found == "fail":
raise DateNotFoundError("Data not found for date", date)
if if_not_found == "nan":
return date, float("NaN")
raise ValueError(f"Invalid argument for if_not_found: {if_not_found}")
def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> int:
"""Converts any time period to years for use with compounding functions"""
year_conversion_factor = {"years": 1, "months": 12, "days": 365}
years = interval_value / year_conversion_factor[interval_type]
return years

111
testing.ipynb

@ -20,11 +20,19 @@
"id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 125 ms\n",
"Wall time: 99 ms\n"
]
},
{
"data": {
"text/plain": [
"[(datetime.datetime(2022, 1, 31, 0, 0), 310.980011),\n",
" (datetime.datetime(2021, 5, 28, 0, 0), 249.679993)]"
" (datetime.datetime(2021, 5, 28, 0, 0), 249.67999300000002)]"
]
},
"execution_count": 2,
@ -33,6 +41,7 @@
}
],
"source": [
"%%time\n",
"dfd = pd.read_csv('test_files/msft.csv')\n",
"# dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)\n",
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
@ -40,6 +49,27 @@
"ts[['2022-01-31', '2021-05-28']]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ffd9665d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(datetime.datetime(2022, 1, 31, 0, 0), 310.980011)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts['2022-01-31']"
]
},
{
"cell_type": "code",
"execution_count": 3,
@ -50,7 +80,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 17 ms\n"
"CPU times: total: 15.6 ms\n",
"Wall time: 16 ms\n"
]
},
{
@ -86,7 +117,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 5.97 ms\n"
"CPU times: total: 15.6 ms\n",
"Wall time: 4 ms\n"
]
},
{
@ -95,7 +127,7 @@
"[(datetime.datetime(1992, 2, 19, 0, 0), 2.398438),\n",
" (datetime.datetime(1992, 2, 20, 0, 0), 2.447917),\n",
" (datetime.datetime(1992, 2, 21, 0, 0), 2.385417),\n",
" (datetime.datetime(1992, 2, 24, 0, 0), 2.393229),\n",
" (datetime.datetime(1992, 2, 24, 0, 0), 2.3932290000000003),\n",
" (datetime.datetime(1992, 2, 25, 0, 0), 2.411458),\n",
" (datetime.datetime(1992, 2, 26, 0, 0), 2.541667),\n",
" (datetime.datetime(1992, 2, 27, 0, 0), 2.601563),\n",
@ -116,7 +148,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 11,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {
"tags": []
@ -126,7 +158,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 311 ms\n"
"CPU times: total: 297 ms\n",
"Wall time: 290 ms\n"
]
}
],
@ -186,6 +219,70 @@
"sr = Series([1, 2, 3, 4, 5], 'number')\n",
"sr"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "adceda69",
"metadata": {},
"outputs": [],
"source": [
"from fincal.fincal import TimeSeries\n",
"import datetime\n",
"ts = TimeSeries(data = [('2021-01-01', 220), ('2021-02-01', 230), ('2021-03-01', 240)], frequency='M')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "68cf9f8c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(datetime.datetime(2021, 2, 1, 0, 0), 0.045454545454545414)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts.calculate_returns('2021-02-05', interval_type='months', interval_value=1, compounding=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a583347f",
"metadata": {},
"outputs": [],
"source": [
"D = {'a': 1, 'b': 2}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f79ac787",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['a', 'b'])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"D.keys()"
]
}
],
"metadata": {
@ -204,7 +301,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
"version": "3.8.3"
}
},
"nbformat": 4,

4
tests/test_fincal.py

@ -4,8 +4,10 @@ import random
from typing import Literal, Sequence
import pytest
from fincal.core import DateNotFoundError, FincalOptions, Frequency, Series
from fincal.core import Frequency, Series
from fincal.exceptions import DateNotFoundError
from fincal.fincal import TimeSeries, create_date_series
from fincal.utils import FincalOptions
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sample_data_path = os.path.join(THIS_DIR, "data")

Loading…
Cancel
Save