Compare commits

...

5 Commits

  1. 82
      fincal/core.py
  2. 175
      testing.ipynb
  3. 49
      tests/test_core.py
  4. 56
      tests/test_fincal.py

82
fincal/core.py

@ -2,7 +2,7 @@ import datetime
from collections import UserDict, UserList from collections import UserDict, UserList
from dataclasses import dataclass from dataclasses import dataclass
from numbers import Number from numbers import Number
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Type, Union from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
@dataclass @dataclass
@ -138,20 +138,36 @@ class Series(UserList):
def __init__( def __init__(
self, self,
data, data,
data_type: Union[Type[bool], Type[float], Type[str], Type[datetime.datetime]], data_type: Literal['date', 'number', 'bool'],
date_format: str = None, date_format: str = None,
): ):
self.dtype = data_type types_dict = {
'date': datetime.datetime,
'datetime': datetime.datetime,
'datetime.datetime': datetime.datetime,
'float': float,
'int': float,
'number': float,
'bool': bool
}
if data_type not in types_dict.keys():
raise ValueError("Unsupported value for data type")
if not isinstance(data, Sequence): if not isinstance(data, Sequence):
raise TypeError("Series object can only be created using Sequence types") raise TypeError("Series object can only be created using Sequence types")
for i in data: if data_type in ['date', 'datetime', 'datetime.datetime']:
if not isinstance(i, data_type):
raise Exception("All arguments must be of the same type")
if data_type == str:
data = [_parse_date(i, date_format) for i in data] data = [_parse_date(i, date_format) for i in data]
else:
func = types_dict[data_type]
data = [func(i) for i in data]
# elif data_type == 'number':
# data = [float(i) for i in data]
# elif data_type == 'boolean':
# data = [bool(i) for i in data]
self.dtype = types_dict[data_type]
self.data = data self.data = data
def __repr__(self): def __repr__(self):
@ -159,7 +175,7 @@ class Series(UserList):
def __getitem__(self, i): def __getitem__(self, i):
if isinstance(i, slice): if isinstance(i, slice):
return self.__class__(self.data[i], self.dtype) return self.__class__(self.data[i], str(self.dtype.__name__))
else: else:
return self.data[i] return self.data[i]
@ -171,25 +187,58 @@ class Series(UserList):
other = _parse_date(other) other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype): if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
gt = Series([i > other for i in self.data], bool) gt = Series([i > other for i in self.data], 'bool')
else: else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}") raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return gt return gt
def __ge__(self, other):
if self.dtype == bool:
raise TypeError(">= not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
ge = Series([i >= other for i in self.data], 'bool')
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return ge
def __lt__(self, other): def __lt__(self, other):
if self.dtype == bool: if self.dtype == bool:
raise TypeError("< not supported for boolean series") raise TypeError("< not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype): if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
lt = Series([i < other for i in self.data], bool) lt = Series([i < other for i in self.data], 'bool')
else: else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}") raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return lt return lt
def __le__(self, other):
if self.dtype == bool:
raise TypeError("<= not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
le = Series([i <= other for i in self.data], 'bool')
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return le
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype): if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
eq = Series([i == other for i in self.data], bool) eq = Series([i == other for i in self.data], 'bool')
else: else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}") raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return eq return eq
@ -237,14 +286,14 @@ class TimeSeriesCore(UserDict):
if self._dates is None or len(self._dates) != len(self.data): if self._dates is None or len(self._dates) != len(self.data):
self._dates = list(self.data.keys()) self._dates = list(self.data.keys())
return Series(self._dates, datetime.datetime) return Series(self._dates, 'date')
@property @property
def values(self): def values(self):
if self._values is None or len(self._values) != len(self.data): if self._values is None or len(self._values) != len(self.data):
self._values = list(self.data.values()) self._values = list(self.data.values())
return Series(self._values, float) return Series(self._values, 'number')
@property @property
def start_date(self): def start_date(self):
@ -304,7 +353,8 @@ class TimeSeriesCore(UserDict):
elif len(key) != len(self.dates): elif len(key) != len(self.dates):
raise Exception(f"Length of Series: {len(key)} did not match length of object: {len(self.dates)}") raise Exception(f"Length of Series: {len(key)} did not match length of object: {len(self.dates)}")
else: else:
dates_to_return = [self.dates[i] for i, j in enumerate(key) if j] dates = self.dates
dates_to_return = [dates[i] for i, j in enumerate(key) if j]
data_to_return = [(key, self.data[key]) for key in dates_to_return] data_to_return = [(key, self.data[key]) for key in dates_to_return]
return self.__class__(data_to_return, frequency=self.frequency.symbol) return self.__class__(data_to_return, frequency=self.frequency.symbol)

175
testing.ipynb

@ -19,35 +19,50 @@
"execution_count": 2, "execution_count": 2,
"id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6", "id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"[(datetime.datetime(2022, 1, 31, 0, 0), 310.980011),\n",
" (datetime.datetime(2021, 5, 28, 0, 0), 249.679993)]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')\n", "dfd = pd.read_csv('test_files/msft.csv')\n",
"dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)" "# dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)\n",
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
"repr(ts)\n",
"ts[['2022-01-31', '2021-05-28']]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 3,
"id": "c52b0c2c-dd01-48dd-9ffa-3147ec9571ef", "id": "086d4377-d1b1-4e51-84c0-39dee28ef75e",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Warning: The input data contains duplicate dates which have been ignored.\n" "Wall time: 17 ms\n"
] ]
}, },
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n", "TimeSeries([(datetime.datetime(2022, 1, 3, 0, 0), 334.75),\n",
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n", "\t (datetime.datetime(2022, 1, 4, 0, 0), 329.01001),\n",
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n", "\t (datetime.datetime(2022, 1, 5, 0, 0), 316.380005)\n",
"\t ...\n", "\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n", "\t (datetime.datetime(2022, 2, 16, 0, 0), 299.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n", "\t (datetime.datetime(2022, 2, 17, 0, 0), 290.730011),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')" "\t (datetime.datetime(2022, 2, 18, 0, 0), 287.929993)], frequency='D')"
] ]
}, },
"execution_count": 3, "execution_count": 3,
@ -56,21 +71,37 @@
} }
], ],
"source": [ "source": [
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n", "%%time\n",
"ts" "s = ts.dates >= '2022-01-01'\n",
"ts[s]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 4,
"id": "9e8ff6c6-3a36-435a-ba87-5b9844c18779", "id": "e815edc9-3746-4192-814e-bd27b2771a0c",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 5.97 ms\n"
]
},
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[(datetime.datetime(2022, 1, 31, 0, 0), 85.18),\n", "[(datetime.datetime(1992, 2, 19, 0, 0), 2.398438),\n",
" (datetime.datetime(2021, 5, 31, 0, 0), 74.85)]" " (datetime.datetime(1992, 2, 20, 0, 0), 2.447917),\n",
" (datetime.datetime(1992, 2, 21, 0, 0), 2.385417),\n",
" (datetime.datetime(1992, 2, 24, 0, 0), 2.393229),\n",
" (datetime.datetime(1992, 2, 25, 0, 0), 2.411458),\n",
" (datetime.datetime(1992, 2, 26, 0, 0), 2.541667),\n",
" (datetime.datetime(1992, 2, 27, 0, 0), 2.601563),\n",
" (datetime.datetime(1992, 2, 28, 0, 0), 2.572917),\n",
" (datetime.datetime(1992, 3, 2, 0, 0), 2.5625),\n",
" (datetime.datetime(1992, 3, 3, 0, 0), 2.567708)]"
] ]
}, },
"execution_count": 4, "execution_count": 4,
@ -79,126 +110,82 @@
} }
], ],
"source": [ "source": [
"ts[['2022-01-31', '2021-05-31']]" "%%time\n",
"ts.iloc[:10]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 5,
"id": "086d4377-d1b1-4e51-84c0-39dee28ef75e", "id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [ "outputs": [
{ {
"data": { "name": "stdout",
"text/plain": [ "output_type": "stream",
"TimeSeries([(datetime.datetime(2021, 2, 15, 0, 0), 73.483),\n", "text": [
"\t (datetime.datetime(2021, 2, 16, 0, 0), 73.237),\n", "Wall time: 311 ms\n"
"\t (datetime.datetime(2021, 2, 17, 0, 0), 72.98)\n", ]
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
"ts[ts.dates>'2021-02-14']" "%%time\n",
"from_date = datetime.date(1994, 1, 1)\n",
"to_date = datetime.date(2022, 1, 1)\n",
"# print(ts.calculate_returns(to_date, years=7))\n",
"rr = ts.calculate_rolling_returns(from_date, to_date)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 6,
"id": "6f1226a3-2327-435b-88e7-fd0fdcc8cc1c", "id": "e5d357b4-4fe5-4a0a-8107-0ab6828d7c41",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"TimeSeries([(datetime.datetime(2020, 1, 2, 0, 0), 58.285),\n", "TimeSeries([(datetime.datetime(1994, 1, 3, 0, 0), -0.06149359306648605),\n",
"\t (datetime.datetime(2020, 1, 3, 0, 0), 58.056999999999995),\n", "\t (datetime.datetime(1994, 1, 4, 0, 0), -0.05433177603118022),\n",
"\t (datetime.datetime(2020, 1, 6, 0, 0), 56.938)\n", "\t (datetime.datetime(1994, 1, 5, 0, 0), -0.04913276300578029)\n",
"\t ...\n", "\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n", "\t (datetime.datetime(2021, 12, 29, 0, 0), 0.5255410267822715),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n", "\t (datetime.datetime(2021, 12, 30, 0, 0), 0.5306749265370103),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')" "\t (datetime.datetime(2021, 12, 31, 0, 0), 0.5120942811985818)], frequency='D')"
] ]
}, },
"execution_count": 5, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"s = ts.dates > '2020-01-01'\n", "rr"
"ts[s]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 7,
"id": "e815edc9-3746-4192-814e-bd27b2771a0c", "id": "4bad2efa",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n", "Series([1.0, 2.0, 3.0, 4.0, 5.0])"
" (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
" (datetime.datetime(2013, 1, 4, 0, 0), 19.008),\n",
" (datetime.datetime(2013, 1, 7, 0, 0), 18.95),\n",
" (datetime.datetime(2013, 1, 8, 0, 0), 18.954),\n",
" (datetime.datetime(2013, 1, 9, 0, 0), 18.94),\n",
" (datetime.datetime(2013, 1, 10, 0, 0), 18.957),\n",
" (datetime.datetime(2013, 1, 11, 0, 0), 18.948),\n",
" (datetime.datetime(2013, 1, 14, 0, 0), 19.177),\n",
" (datetime.datetime(2013, 1, 15, 0, 0), 19.272000000000002)]"
] ]
}, },
"execution_count": 9, "execution_count": 7,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"ts.iloc[:10]" "sr = Series([1, 2, 3, 4, 5], 'number')\n",
"sr"
] ]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 15.6 ms\n",
"Wall time: 14 ms\n"
]
}
],
"source": [
"%%time\n",
"from_date = datetime.date(2020, 1, 1)\n",
"to_date = datetime.date(2021, 1, 1)\n",
"# print(ts.calculate_returns(to_date, years=7))\n",
"rr = ts.calculate_rolling_returns(from_date, to_date)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5d357b4-4fe5-4a0a-8107-0ab6828d7c41",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@ -217,7 +204,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.3" "version": "3.9.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

49
tests/test_core.py

@ -0,0 +1,49 @@
import datetime
from fincal.core import AllFrequencies, Frequency, Series
from fincal.fincal import create_date_series
class TestFrequency:
def test_creation(self):
D = Frequency('daily', 'days', 1, 1, 'D')
assert D.days == 1
assert D.symbol == 'D'
assert D.name == 'daily'
assert D.value == 1
assert D.freq_type == 'days'
class TestAllFrequencies:
def test_attributes(self):
assert hasattr(AllFrequencies, 'D')
assert hasattr(AllFrequencies, 'M')
assert hasattr(AllFrequencies, 'Q')
def test_days(self):
assert AllFrequencies.D.days == 1
assert AllFrequencies.M.days == 30
assert AllFrequencies.Q.days == 91
def test_symbol(self):
assert AllFrequencies.H.symbol == 'H'
assert AllFrequencies.W.symbol == 'W'
def test_values(self):
assert AllFrequencies.H.value == 6
assert AllFrequencies.Y.value == 1
def test_type(self):
assert AllFrequencies.Q.freq_type == 'months'
assert AllFrequencies.W.freq_type == 'days'
class TestSeries:
def test_creation(self):
series = Series([1, 2, 3, 4, 5, 6, 7], data_type=int)
assert series.dtype == float
assert series[2] == 3
dates = create_date_series('2021-01-01', '2021-01-31', 'D')
series = Series(dates, data_type=datetime.datetime)
assert Series.dtype == datetime.datetime

56
tests/test_fincal.py

@ -4,7 +4,7 @@ import random
from typing import Literal, Sequence from typing import Literal, Sequence
import pytest import pytest
from fincal.core import Frequency from fincal.core import Frequency, Series
from fincal.fincal import TimeSeries, create_date_series from fincal.fincal import TimeSeries, create_date_series
THIS_DIR = os.path.dirname(os.path.abspath(__file__)) THIS_DIR = os.path.dirname(os.path.abspath(__file__))
@ -18,8 +18,8 @@ def create_test_data(
gaps: float, gaps: float,
month_position: Literal["start", "middle", "end"], month_position: Literal["start", "middle", "end"],
date_as_str: bool, date_as_str: bool,
as_outer_type: Literal['dict', 'list'] = 'list', as_outer_type: Literal["dict", "list"] = "list",
as_inner_type: Literal['dict', 'list', 'tuple'] = 'tuple' as_inner_type: Literal["dict", "list", "tuple"] = "tuple",
) -> Sequence[tuple]: ) -> Sequence[tuple]:
start_dates = { start_dates = {
"start": datetime.datetime(2016, 1, 1), "start": datetime.datetime(2016, 1, 1),
@ -35,19 +35,19 @@ def create_test_data(
for i in to_remove: for i in to_remove:
dates.remove(i) dates.remove(i)
if date_as_str: if date_as_str:
dates = [i.strftime('%Y-%m-%d') for i in dates] dates = [i.strftime("%Y-%m-%d") for i in dates]
values = [random.randint(8000, 90000)/100 for _ in dates] values = [random.randint(8000, 90000) / 100 for _ in dates]
data = list(zip(dates, values)) data = list(zip(dates, values))
if as_outer_type == 'list': if as_outer_type == "list":
if as_inner_type == 'list': if as_inner_type == "list":
data = [list(i) for i in data] data = [list(i) for i in data]
elif as_inner_type == 'dict[1]': elif as_inner_type == "dict[1]":
data = [dict((i,)) for i in data] data = [dict((i,)) for i in data]
elif as_inner_type == 'dict[2]': elif as_inner_type == "dict[2]":
data = [dict(date=i, value=j) for i, j in data] data = [dict(date=i, value=j) for i, j in data]
elif as_outer_type == 'dict': elif as_outer_type == "dict":
data = dict(data) data = dict(data)
return data return data
@ -117,7 +117,7 @@ class TestDateSeries:
class TestFincal: class TestFincal:
def test_creation(self): def test_creation(self):
data = create_test_data(frequency='D', eomonth=False, n=50, gaps=0, month_position='start', date_as_str=True) data = create_test_data(frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D") time_series = TimeSeries(data, frequency="D")
assert len(time_series) == 50 assert len(time_series) == 50
assert isinstance(time_series.frequency, Frequency) assert isinstance(time_series.frequency, Frequency)
@ -126,26 +126,36 @@ class TestFincal:
ffill_data = time_series.ffill() ffill_data = time_series.ffill()
assert len(ffill_data) == 50 assert len(ffill_data) == 50
data = create_test_data(frequency='D', eomonth=False, n=500, gaps=0.1, month_position='start', date_as_str=True) data = create_test_data(frequency="D", eomonth=False, n=500, gaps=0.1, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D") time_series = TimeSeries(data, frequency="D")
assert len(time_series) == 450 assert len(time_series) == 450
def test_ffill(self): def test_ffill(self):
data = create_test_data(frequency='D', eomonth=False, n=500, gaps=0.1, month_position='start', date_as_str=True) data = create_test_data(frequency="D", eomonth=False, n=500, gaps=0.1, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D") time_series = TimeSeries(data, frequency="D")
ffill_data = time_series.ffill() ffill_data = time_series.ffill()
assert len(ffill_data) > 498 assert len(ffill_data) >= 498
ffill_data = time_series.ffill(inplace=True) ffill_data = time_series.ffill(inplace=True)
assert ffill_data is None assert ffill_data is None
assert len(time_series) > 498 assert len(time_series) >= 498
def test_slicing(self): def test_iloc_slicing(self):
data = create_test_data(frequency='D', eomonth=False, n=50, gaps=0, month_position='start', date_as_str=True) data = create_test_data(frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D") time_series = TimeSeries(data, frequency="D")
assert time_series[0] is not None assert time_series.iloc[0] is not None
assert time_series[:3] is not None assert time_series.iloc[:3] is not None
assert time_series[5:7] is not None assert time_series.iloc[5:7] is not None
assert isinstance(time_series[0], tuple) assert isinstance(time_series.iloc[0], tuple)
assert isinstance(time_series[10:20], list) assert isinstance(time_series.iloc[10:20], list)
assert len(time_series[10:20]) == 10 assert len(time_series.iloc[10:20]) == 10
def test_key_slicing(self):
data = create_test_data(frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D")
available_date = time_series.iloc[5][0]
assert time_series[available_date] is not None
assert isinstance(time_series["dates"], Series)
assert isinstance(time_series["values"], Series)
assert len(time_series.dates) == 50
assert len(time_series.values) == 50

Loading…
Cancel
Save