Compare commits

...

5 Commits

  1. 82
      fincal/core.py
  2. 175
      testing.ipynb
  3. 49
      tests/test_core.py
  4. 56
      tests/test_fincal.py

82
fincal/core.py

@ -2,7 +2,7 @@ import datetime
from collections import UserDict, UserList
from dataclasses import dataclass
from numbers import Number
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Type, Union
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
@dataclass
@ -138,20 +138,36 @@ class Series(UserList):
def __init__(
self,
data,
data_type: Union[Type[bool], Type[float], Type[str], Type[datetime.datetime]],
data_type: Literal['date', 'number', 'bool'],
date_format: str = None,
):
self.dtype = data_type
types_dict = {
'date': datetime.datetime,
'datetime': datetime.datetime,
'datetime.datetime': datetime.datetime,
'float': float,
'int': float,
'number': float,
'bool': bool
}
if data_type not in types_dict.keys():
raise ValueError("Unsupported value for data type")
if not isinstance(data, Sequence):
raise TypeError("Series object can only be created using Sequence types")
for i in data:
if not isinstance(i, data_type):
raise Exception("All arguments must be of the same type")
if data_type == str:
if data_type in ['date', 'datetime', 'datetime.datetime']:
data = [_parse_date(i, date_format) for i in data]
else:
func = types_dict[data_type]
data = [func(i) for i in data]
# elif data_type == 'number':
# data = [float(i) for i in data]
# elif data_type == 'boolean':
# data = [bool(i) for i in data]
self.dtype = types_dict[data_type]
self.data = data
def __repr__(self):
@ -159,7 +175,7 @@ class Series(UserList):
def __getitem__(self, i):
if isinstance(i, slice):
return self.__class__(self.data[i], self.dtype)
return self.__class__(self.data[i], str(self.dtype.__name__))
else:
return self.data[i]
@ -171,25 +187,58 @@ class Series(UserList):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
gt = Series([i > other for i in self.data], bool)
gt = Series([i > other for i in self.data], 'bool')
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return gt
def __ge__(self, other):
if self.dtype == bool:
raise TypeError(">= not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
ge = Series([i >= other for i in self.data], 'bool')
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return ge
def __lt__(self, other):
if self.dtype == bool:
raise TypeError("< not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
lt = Series([i < other for i in self.data], bool)
lt = Series([i < other for i in self.data], 'bool')
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return lt
def __le__(self, other):
if self.dtype == bool:
raise TypeError("<= not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
le = Series([i <= other for i in self.data], 'bool')
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return le
def __eq__(self, other):
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
eq = Series([i == other for i in self.data], bool)
eq = Series([i == other for i in self.data], 'bool')
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return eq
@ -237,14 +286,14 @@ class TimeSeriesCore(UserDict):
if self._dates is None or len(self._dates) != len(self.data):
self._dates = list(self.data.keys())
return Series(self._dates, datetime.datetime)
return Series(self._dates, 'date')
@property
def values(self):
if self._values is None or len(self._values) != len(self.data):
self._values = list(self.data.values())
return Series(self._values, float)
return Series(self._values, 'number')
@property
def start_date(self):
@ -304,7 +353,8 @@ class TimeSeriesCore(UserDict):
elif len(key) != len(self.dates):
raise Exception(f"Length of Series: {len(key)} did not match length of object: {len(self.dates)}")
else:
dates_to_return = [self.dates[i] for i, j in enumerate(key) if j]
dates = self.dates
dates_to_return = [dates[i] for i, j in enumerate(key) if j]
data_to_return = [(key, self.data[key]) for key in dates_to_return]
return self.__class__(data_to_return, frequency=self.frequency.symbol)

175
testing.ipynb

@ -19,35 +19,50 @@
"execution_count": 2,
"id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"[(datetime.datetime(2022, 1, 31, 0, 0), 310.980011),\n",
" (datetime.datetime(2021, 5, 28, 0, 0), 249.679993)]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')\n",
"dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)"
"dfd = pd.read_csv('test_files/msft.csv')\n",
"# dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)\n",
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
"repr(ts)\n",
"ts[['2022-01-31', '2021-05-28']]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c52b0c2c-dd01-48dd-9ffa-3147ec9571ef",
"id": "086d4377-d1b1-4e51-84c0-39dee28ef75e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: The input data contains duplicate dates which have been ignored.\n"
"Wall time: 17 ms\n"
]
},
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n",
"TimeSeries([(datetime.datetime(2022, 1, 3, 0, 0), 334.75),\n",
"\t (datetime.datetime(2022, 1, 4, 0, 0), 329.01001),\n",
"\t (datetime.datetime(2022, 1, 5, 0, 0), 316.380005)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
"\t (datetime.datetime(2022, 2, 16, 0, 0), 299.5),\n",
"\t (datetime.datetime(2022, 2, 17, 0, 0), 290.730011),\n",
"\t (datetime.datetime(2022, 2, 18, 0, 0), 287.929993)], frequency='D')"
]
},
"execution_count": 3,
@ -56,21 +71,37 @@
}
],
"source": [
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
"ts"
"%%time\n",
"s = ts.dates >= '2022-01-01'\n",
"ts[s]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9e8ff6c6-3a36-435a-ba87-5b9844c18779",
"id": "e815edc9-3746-4192-814e-bd27b2771a0c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 5.97 ms\n"
]
},
{
"data": {
"text/plain": [
"[(datetime.datetime(2022, 1, 31, 0, 0), 85.18),\n",
" (datetime.datetime(2021, 5, 31, 0, 0), 74.85)]"
"[(datetime.datetime(1992, 2, 19, 0, 0), 2.398438),\n",
" (datetime.datetime(1992, 2, 20, 0, 0), 2.447917),\n",
" (datetime.datetime(1992, 2, 21, 0, 0), 2.385417),\n",
" (datetime.datetime(1992, 2, 24, 0, 0), 2.393229),\n",
" (datetime.datetime(1992, 2, 25, 0, 0), 2.411458),\n",
" (datetime.datetime(1992, 2, 26, 0, 0), 2.541667),\n",
" (datetime.datetime(1992, 2, 27, 0, 0), 2.601563),\n",
" (datetime.datetime(1992, 2, 28, 0, 0), 2.572917),\n",
" (datetime.datetime(1992, 3, 2, 0, 0), 2.5625),\n",
" (datetime.datetime(1992, 3, 3, 0, 0), 2.567708)]"
]
},
"execution_count": 4,
@ -79,126 +110,82 @@
}
],
"source": [
"ts[['2022-01-31', '2021-05-31']]"
"%%time\n",
"ts.iloc[:10]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "086d4377-d1b1-4e51-84c0-39dee28ef75e",
"metadata": {},
"execution_count": 5,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2021, 2, 15, 0, 0), 73.483),\n",
"\t (datetime.datetime(2021, 2, 16, 0, 0), 73.237),\n",
"\t (datetime.datetime(2021, 2, 17, 0, 0), 72.98)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 311 ms\n"
]
}
],
"source": [
"ts[ts.dates>'2021-02-14']"
"%%time\n",
"from_date = datetime.date(1994, 1, 1)\n",
"to_date = datetime.date(2022, 1, 1)\n",
"# print(ts.calculate_returns(to_date, years=7))\n",
"rr = ts.calculate_rolling_returns(from_date, to_date)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "6f1226a3-2327-435b-88e7-fd0fdcc8cc1c",
"execution_count": 6,
"id": "e5d357b4-4fe5-4a0a-8107-0ab6828d7c41",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2020, 1, 2, 0, 0), 58.285),\n",
"\t (datetime.datetime(2020, 1, 3, 0, 0), 58.056999999999995),\n",
"\t (datetime.datetime(2020, 1, 6, 0, 0), 56.938)\n",
"TimeSeries([(datetime.datetime(1994, 1, 3, 0, 0), -0.06149359306648605),\n",
"\t (datetime.datetime(1994, 1, 4, 0, 0), -0.05433177603118022),\n",
"\t (datetime.datetime(1994, 1, 5, 0, 0), -0.04913276300578029)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
"\t (datetime.datetime(2021, 12, 29, 0, 0), 0.5255410267822715),\n",
"\t (datetime.datetime(2021, 12, 30, 0, 0), 0.5306749265370103),\n",
"\t (datetime.datetime(2021, 12, 31, 0, 0), 0.5120942811985818)], frequency='D')"
]
},
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s = ts.dates > '2020-01-01'\n",
"ts[s]"
"rr"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e815edc9-3746-4192-814e-bd27b2771a0c",
"execution_count": 7,
"id": "4bad2efa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
" (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
" (datetime.datetime(2013, 1, 4, 0, 0), 19.008),\n",
" (datetime.datetime(2013, 1, 7, 0, 0), 18.95),\n",
" (datetime.datetime(2013, 1, 8, 0, 0), 18.954),\n",
" (datetime.datetime(2013, 1, 9, 0, 0), 18.94),\n",
" (datetime.datetime(2013, 1, 10, 0, 0), 18.957),\n",
" (datetime.datetime(2013, 1, 11, 0, 0), 18.948),\n",
" (datetime.datetime(2013, 1, 14, 0, 0), 19.177),\n",
" (datetime.datetime(2013, 1, 15, 0, 0), 19.272000000000002)]"
"Series([1.0, 2.0, 3.0, 4.0, 5.0])"
]
},
"execution_count": 9,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts.iloc[:10]"
"sr = Series([1, 2, 3, 4, 5], 'number')\n",
"sr"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 15.6 ms\n",
"Wall time: 14 ms\n"
]
}
],
"source": [
"%%time\n",
"from_date = datetime.date(2020, 1, 1)\n",
"to_date = datetime.date(2021, 1, 1)\n",
"# print(ts.calculate_returns(to_date, years=7))\n",
"rr = ts.calculate_rolling_returns(from_date, to_date)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5d357b4-4fe5-4a0a-8107-0ab6828d7c41",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -217,7 +204,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
"version": "3.9.2"
}
},
"nbformat": 4,

49
tests/test_core.py

@ -0,0 +1,49 @@
import datetime
from fincal.core import AllFrequencies, Frequency, Series
from fincal.fincal import create_date_series
class TestFrequency:
def test_creation(self):
D = Frequency('daily', 'days', 1, 1, 'D')
assert D.days == 1
assert D.symbol == 'D'
assert D.name == 'daily'
assert D.value == 1
assert D.freq_type == 'days'
class TestAllFrequencies:
def test_attributes(self):
assert hasattr(AllFrequencies, 'D')
assert hasattr(AllFrequencies, 'M')
assert hasattr(AllFrequencies, 'Q')
def test_days(self):
assert AllFrequencies.D.days == 1
assert AllFrequencies.M.days == 30
assert AllFrequencies.Q.days == 91
def test_symbol(self):
assert AllFrequencies.H.symbol == 'H'
assert AllFrequencies.W.symbol == 'W'
def test_values(self):
assert AllFrequencies.H.value == 6
assert AllFrequencies.Y.value == 1
def test_type(self):
assert AllFrequencies.Q.freq_type == 'months'
assert AllFrequencies.W.freq_type == 'days'
class TestSeries:
def test_creation(self):
series = Series([1, 2, 3, 4, 5, 6, 7], data_type=int)
assert series.dtype == float
assert series[2] == 3
dates = create_date_series('2021-01-01', '2021-01-31', 'D')
series = Series(dates, data_type=datetime.datetime)
assert Series.dtype == datetime.datetime

56
tests/test_fincal.py

@ -4,7 +4,7 @@ import random
from typing import Literal, Sequence
import pytest
from fincal.core import Frequency
from fincal.core import Frequency, Series
from fincal.fincal import TimeSeries, create_date_series
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
@ -18,8 +18,8 @@ def create_test_data(
gaps: float,
month_position: Literal["start", "middle", "end"],
date_as_str: bool,
as_outer_type: Literal['dict', 'list'] = 'list',
as_inner_type: Literal['dict', 'list', 'tuple'] = 'tuple'
as_outer_type: Literal["dict", "list"] = "list",
as_inner_type: Literal["dict", "list", "tuple"] = "tuple",
) -> Sequence[tuple]:
start_dates = {
"start": datetime.datetime(2016, 1, 1),
@ -35,19 +35,19 @@ def create_test_data(
for i in to_remove:
dates.remove(i)
if date_as_str:
dates = [i.strftime('%Y-%m-%d') for i in dates]
dates = [i.strftime("%Y-%m-%d") for i in dates]
values = [random.randint(8000, 90000)/100 for _ in dates]
values = [random.randint(8000, 90000) / 100 for _ in dates]
data = list(zip(dates, values))
if as_outer_type == 'list':
if as_inner_type == 'list':
if as_outer_type == "list":
if as_inner_type == "list":
data = [list(i) for i in data]
elif as_inner_type == 'dict[1]':
elif as_inner_type == "dict[1]":
data = [dict((i,)) for i in data]
elif as_inner_type == 'dict[2]':
elif as_inner_type == "dict[2]":
data = [dict(date=i, value=j) for i, j in data]
elif as_outer_type == 'dict':
elif as_outer_type == "dict":
data = dict(data)
return data
@ -117,7 +117,7 @@ class TestDateSeries:
class TestFincal:
def test_creation(self):
data = create_test_data(frequency='D', eomonth=False, n=50, gaps=0, month_position='start', date_as_str=True)
data = create_test_data(frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D")
assert len(time_series) == 50
assert isinstance(time_series.frequency, Frequency)
@ -126,26 +126,36 @@ class TestFincal:
ffill_data = time_series.ffill()
assert len(ffill_data) == 50
data = create_test_data(frequency='D', eomonth=False, n=500, gaps=0.1, month_position='start', date_as_str=True)
data = create_test_data(frequency="D", eomonth=False, n=500, gaps=0.1, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D")
assert len(time_series) == 450
def test_ffill(self):
data = create_test_data(frequency='D', eomonth=False, n=500, gaps=0.1, month_position='start', date_as_str=True)
data = create_test_data(frequency="D", eomonth=False, n=500, gaps=0.1, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D")
ffill_data = time_series.ffill()
assert len(ffill_data) > 498
assert len(ffill_data) >= 498
ffill_data = time_series.ffill(inplace=True)
assert ffill_data is None
assert len(time_series) > 498
assert len(time_series) >= 498
def test_slicing(self):
data = create_test_data(frequency='D', eomonth=False, n=50, gaps=0, month_position='start', date_as_str=True)
def test_iloc_slicing(self):
data = create_test_data(frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D")
assert time_series[0] is not None
assert time_series[:3] is not None
assert time_series[5:7] is not None
assert isinstance(time_series[0], tuple)
assert isinstance(time_series[10:20], list)
assert len(time_series[10:20]) == 10
assert time_series.iloc[0] is not None
assert time_series.iloc[:3] is not None
assert time_series.iloc[5:7] is not None
assert isinstance(time_series.iloc[0], tuple)
assert isinstance(time_series.iloc[10:20], list)
assert len(time_series.iloc[10:20]) == 10
def test_key_slicing(self):
data = create_test_data(frequency="D", eomonth=False, n=50, gaps=0, month_position="start", date_as_str=True)
time_series = TimeSeries(data, frequency="D")
available_date = time_series.iloc[5][0]
assert time_series[available_date] is not None
assert isinstance(time_series["dates"], Series)
assert isinstance(time_series["values"], Series)
assert len(time_series.dates) == 50
assert len(time_series.values) == 50

Loading…
Cancel
Save