separated utility functions and exceptions in standalone files

This commit is contained in:
Gourav Kumar 2022-02-26 12:46:42 +05:30
parent ef2973a1d1
commit 7cac5cc307
6 changed files with 228 additions and 125 deletions

View File

@ -2,13 +2,9 @@ import datetime
from collections import UserDict, UserList
from dataclasses import dataclass
from numbers import Number
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
from typing import Iterable, List, Literal, Sequence
@dataclass
class FincalOptions:
date_format: str = "%Y-%m-%d"
closest: str = "before" # after
from .utils import _parse_date, _preprocess_timeseries
@dataclass(frozen=True)
@ -29,114 +25,6 @@ class AllFrequencies:
Y = Frequency("annual", "years", 1, 365, "Y")
class DateNotFoundError(Exception):
"""Exception to be raised when date is not found"""
def __init__(self, message, date):
message = f"{message}: {date}"
super().__init__(message)
def _parse_date(date: str, date_format: str = None):
"""Parses date and handles errors"""
if isinstance(date, (datetime.datetime, datetime.date)):
return datetime.datetime.fromordinal(date.toordinal())
if date_format is None:
date_format = FincalOptions.date_format
try:
date = datetime.datetime.strptime(date, date_format)
except TypeError:
raise ValueError("Date does not seem to be valid date-like string")
except ValueError:
raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
return date
def _preprocess_timeseries(
data: Union[
Sequence[Iterable[Union[str, datetime.datetime, float]]],
Sequence[Mapping[str, Union[float, datetime.datetime]]],
Sequence[Mapping[Union[str, datetime.datetime], float]],
Mapping[Union[str, datetime.datetime], float],
],
date_format: str,
) -> List[Tuple[datetime.datetime, float]]:
"""Converts any type of list to the correct type"""
if isinstance(data, Mapping):
current_data = [(k, v) for k, v in data.items()]
return _preprocess_timeseries(current_data, date_format)
if not isinstance(data, Sequence):
raise TypeError("Could not parse the data")
if isinstance(data[0], Sequence):
return sorted([(_parse_date(i, date_format), j) for i, j in data])
if not isinstance(data[0], Mapping):
raise TypeError("Could not parse the data")
if len(data[0]) == 1:
current_data = [tuple(*i.items()) for i in data]
elif len(data[0]) == 2:
current_data = [tuple(i.values()) for i in data]
else:
raise TypeError("Could not parse the data")
return _preprocess_timeseries(current_data, date_format)
def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta:
"""Checks the arguments and returns appropriate timedelta objects"""
deltas = {"exact": 0, "previous": -1, "next": 1}
if closest not in deltas.keys():
raise ValueError(f"Invalid argument for closest: {closest}")
as_on_match = closest if as_on_match == "closest" else as_on_match
prior_match = closest if prior_match == "closest" else prior_match
if as_on_match in deltas.keys():
as_on_delta = datetime.timedelta(days=deltas[as_on_match])
else:
raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
if prior_match in deltas.keys():
prior_delta = datetime.timedelta(days=deltas[prior_match])
else:
raise ValueError(f"Invalid prior_match argument: {prior_match}")
return as_on_delta, prior_delta
def _find_closest_date(data, date, delta, if_not_found):
"""Helper function to find data for the closest available date"""
row = data.get(date, None)
if row is not None:
return date, row
if delta:
return _find_closest_date(data, date + delta, delta, if_not_found)
if if_not_found == "fail":
raise DateNotFoundError("Data not found for date", date)
if if_not_found == "nan":
return date, float("NaN")
raise ValueError(f"Invalid argument for if_not_found: {if_not_found}")
def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> int:
"""Converts any time period to years for use with compounding functions"""
year_conversion_factor = {"years": 1, "months": 12, "days": 365}
years = interval_value / year_conversion_factor[interval_type]
return years
class _IndexSlicer:
"""Class to create a slice using iloc in TimeSeriesCore"""

6
fincal/exceptions.py Normal file
View File

@ -0,0 +1,6 @@
class DateNotFoundError(Exception):
"""Exception to be raised when date is not found"""
def __init__(self, message, date):
message = f"{message}: {date}"
super().__init__(message)

View File

@ -5,9 +5,8 @@ from typing import List, Literal, Union
from dateutil.relativedelta import relativedelta
from .core import (
AllFrequencies,
TimeSeriesCore,
from .core import AllFrequencies, TimeSeriesCore
from .utils import (
_find_closest_date,
_interval_to_years,
_parse_date,

111
fincal/utils.py Normal file
View File

@ -0,0 +1,111 @@
import datetime
from dataclasses import dataclass
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
from .exceptions import DateNotFoundError
@dataclass
class FincalOptions:
date_format: str = "%Y-%m-%d"
closest: str = "before" # after
def _parse_date(date: str, date_format: str = None):
"""Parses date and handles errors"""
if isinstance(date, (datetime.datetime, datetime.date)):
return datetime.datetime.fromordinal(date.toordinal())
if date_format is None:
date_format = FincalOptions.date_format
try:
date = datetime.datetime.strptime(date, date_format)
except TypeError:
raise ValueError("Date does not seem to be valid date-like string")
except ValueError:
raise ValueError("Date could not be parsed. Have you set the correct date format in FincalOptions.date_format?")
return date
def _preprocess_timeseries(
data: Union[
Sequence[Iterable[Union[str, datetime.datetime, float]]],
Sequence[Mapping[str, Union[float, datetime.datetime]]],
Sequence[Mapping[Union[str, datetime.datetime], float]],
Mapping[Union[str, datetime.datetime], float],
],
date_format: str,
) -> List[Tuple[datetime.datetime, float]]:
"""Converts any type of list to the correct type"""
if isinstance(data, Mapping):
current_data = [(k, v) for k, v in data.items()]
return _preprocess_timeseries(current_data, date_format)
if not isinstance(data, Sequence):
raise TypeError("Could not parse the data")
if isinstance(data[0], Sequence):
return sorted([(_parse_date(i, date_format), j) for i, j in data])
if not isinstance(data[0], Mapping):
raise TypeError("Could not parse the data")
if len(data[0]) == 1:
current_data = [tuple(*i.items()) for i in data]
elif len(data[0]) == 2:
current_data = [tuple(i.values()) for i in data]
else:
raise TypeError("Could not parse the data")
return _preprocess_timeseries(current_data, date_format)
def _preprocess_match_options(as_on_match: str, prior_match: str, closest: str) -> datetime.timedelta:
"""Checks the arguments and returns appropriate timedelta objects"""
deltas = {"exact": 0, "previous": -1, "next": 1}
if closest not in deltas.keys():
raise ValueError(f"Invalid argument for closest: {closest}")
as_on_match = closest if as_on_match == "closest" else as_on_match
prior_match = closest if prior_match == "closest" else prior_match
if as_on_match in deltas.keys():
as_on_delta = datetime.timedelta(days=deltas[as_on_match])
else:
raise ValueError(f"Invalid as_on_match argument: {as_on_match}")
if prior_match in deltas.keys():
prior_delta = datetime.timedelta(days=deltas[prior_match])
else:
raise ValueError(f"Invalid prior_match argument: {prior_match}")
return as_on_delta, prior_delta
def _find_closest_date(data, date, delta, if_not_found):
"""Helper function to find data for the closest available date"""
row = data.get(date, None)
if row is not None:
return date, row
if delta:
return _find_closest_date(data, date + delta, delta, if_not_found)
if if_not_found == "fail":
raise DateNotFoundError("Data not found for date", date)
if if_not_found == "nan":
return date, float("NaN")
raise ValueError(f"Invalid argument for if_not_found: {if_not_found}")
def _interval_to_years(interval_type: Literal["years", "months", "day"], interval_value: int) -> int:
"""Converts any time period to years for use with compounding functions"""
year_conversion_factor = {"years": 1, "months": 12, "days": 365}
years = interval_value / year_conversion_factor[interval_type]
return years

View File

@ -20,11 +20,19 @@
"id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 125 ms\n",
"Wall time: 99 ms\n"
]
},
{
"data": {
"text/plain": [
"[(datetime.datetime(2022, 1, 31, 0, 0), 310.980011),\n",
" (datetime.datetime(2021, 5, 28, 0, 0), 249.679993)]"
" (datetime.datetime(2021, 5, 28, 0, 0), 249.67999300000002)]"
]
},
"execution_count": 2,
@ -33,6 +41,7 @@
}
],
"source": [
"%%time\n",
"dfd = pd.read_csv('test_files/msft.csv')\n",
"# dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)\n",
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
@ -40,6 +49,27 @@
"ts[['2022-01-31', '2021-05-28']]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ffd9665d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(datetime.datetime(2022, 1, 31, 0, 0), 310.980011)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts['2022-01-31']"
]
},
{
"cell_type": "code",
"execution_count": 3,
@ -50,7 +80,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 17 ms\n"
"CPU times: total: 15.6 ms\n",
"Wall time: 16 ms\n"
]
},
{
@ -86,7 +117,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 5.97 ms\n"
"CPU times: total: 15.6 ms\n",
"Wall time: 4 ms\n"
]
},
{
@ -95,7 +127,7 @@
"[(datetime.datetime(1992, 2, 19, 0, 0), 2.398438),\n",
" (datetime.datetime(1992, 2, 20, 0, 0), 2.447917),\n",
" (datetime.datetime(1992, 2, 21, 0, 0), 2.385417),\n",
" (datetime.datetime(1992, 2, 24, 0, 0), 2.393229),\n",
" (datetime.datetime(1992, 2, 24, 0, 0), 2.3932290000000003),\n",
" (datetime.datetime(1992, 2, 25, 0, 0), 2.411458),\n",
" (datetime.datetime(1992, 2, 26, 0, 0), 2.541667),\n",
" (datetime.datetime(1992, 2, 27, 0, 0), 2.601563),\n",
@ -116,7 +148,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 11,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {
"tags": []
@ -126,7 +158,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 311 ms\n"
"CPU times: total: 297 ms\n",
"Wall time: 290 ms\n"
]
}
],
@ -186,6 +219,70 @@
"sr = Series([1, 2, 3, 4, 5], 'number')\n",
"sr"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "adceda69",
"metadata": {},
"outputs": [],
"source": [
"from fincal.fincal import TimeSeries\n",
"import datetime\n",
"ts = TimeSeries(data = [('2021-01-01', 220), ('2021-02-01', 230), ('2021-03-01', 240)], frequency='M')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "68cf9f8c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(datetime.datetime(2021, 2, 1, 0, 0), 0.045454545454545414)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts.calculate_returns('2021-02-05', interval_type='months', interval_value=1, compounding=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a583347f",
"metadata": {},
"outputs": [],
"source": [
"D = {'a': 1, 'b': 2}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f79ac787",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['a', 'b'])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"D.keys()"
]
}
],
"metadata": {
@ -204,7 +301,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
"version": "3.8.3"
}
},
"nbformat": 4,

View File

@ -4,8 +4,10 @@ import random
from typing import Literal, Sequence
import pytest
from fincal.core import DateNotFoundError, FincalOptions, Frequency, Series
from fincal.core import Frequency, Series
from fincal.exceptions import DateNotFoundError
from fincal.fincal import TimeSeries, create_date_series
from fincal.utils import FincalOptions
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sample_data_path = os.path.join(THIS_DIR, "data")