Compare commits

...

7 Commits

10 changed files with 15288 additions and 30 deletions

3
.gitignore vendored
View File

@ -2,4 +2,5 @@
.eggs
.env
*egg-info
__pycache__
__pycache__
.vscode

View File

@ -138,17 +138,17 @@ class Series(UserList):
def __init__(
self,
data,
data_type: Literal['date', 'number', 'bool'],
data_type: Literal["date", "number", "bool"],
date_format: str = None,
):
types_dict = {
'date': datetime.datetime,
'datetime': datetime.datetime,
'datetime.datetime': datetime.datetime,
'float': float,
'int': float,
'number': float,
'bool': bool
"date": datetime.datetime,
"datetime": datetime.datetime,
"datetime.datetime": datetime.datetime,
"float": float,
"int": float,
"number": float,
"bool": bool,
}
if data_type not in types_dict.keys():
@ -157,21 +157,17 @@ class Series(UserList):
if not isinstance(data, Sequence):
raise TypeError("Series object can only be created using Sequence types")
if data_type in ['date', 'datetime', 'datetime.datetime']:
if data_type in ["date", "datetime", "datetime.datetime"]:
data = [_parse_date(i, date_format) for i in data]
else:
func = types_dict[data_type]
data = [func(i) for i in data]
# elif data_type == 'number':
# data = [float(i) for i in data]
# elif data_type == 'boolean':
# data = [bool(i) for i in data]
self.dtype = types_dict[data_type]
self.data = data
def __repr__(self):
return f"{self.__class__.__name__}({self.data})"
return f"{self.__class__.__name__}({self.data}, data_type='{self.dtype.__name__}')"
def __getitem__(self, i):
if isinstance(i, slice):
@ -187,7 +183,7 @@ class Series(UserList):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
gt = Series([i > other for i in self.data], 'bool')
gt = Series([i > other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
@ -201,7 +197,7 @@ class Series(UserList):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
ge = Series([i >= other for i in self.data], 'bool')
ge = Series([i >= other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
@ -215,7 +211,7 @@ class Series(UserList):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
lt = Series([i < other for i in self.data], 'bool')
lt = Series([i < other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return lt
@ -228,7 +224,7 @@ class Series(UserList):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
le = Series([i <= other for i in self.data], 'bool')
le = Series([i <= other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return le
@ -238,7 +234,7 @@ class Series(UserList):
other = _parse_date(other)
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
eq = Series([i == other for i in self.data], 'bool')
eq = Series([i == other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return eq
@ -286,14 +282,14 @@ class TimeSeriesCore(UserDict):
if self._dates is None or len(self._dates) != len(self.data):
self._dates = list(self.data.keys())
return Series(self._dates, 'date')
return Series(self._dates, "date")
@property
def values(self):
if self._values is None or len(self._values) != len(self.data):
self._values = list(self.data.values())
return Series(self._values, 'number')
return Series(self._values, "number")
@property
def start_date(self):
@ -391,6 +387,10 @@ class TimeSeriesCore(UserDict):
self.n += 1
return key, self.data[key]
def __contains__(self, key: object) -> bool:
key = _parse_date(key)
return super().__contains__(key)
def head(self, n: int = 6):
"""Returns the first n items of the TimeSeries object"""
@ -407,6 +407,9 @@ class TimeSeriesCore(UserDict):
result = [(key, self.data[key]) for key in keys]
return result
def items(self):
return self.data.items()
@property
def iloc(self):
"""Returns an item or a set of items based on index"""

View File

@ -17,6 +17,8 @@ def create_date_series(
if eomonth and frequency.days < AllFrequencies.M.days:
raise ValueError(f"eomonth cannot be set to True if frequency is higher than {AllFrequencies.M.name}")
start_date = _parse_date(start_date)
end_date = _parse_date(end_date)
datediff = (end_date - start_date).days / frequency.days + 1
dates = []

View File

@ -1,16 +1,19 @@
import datetime
import time
import timeit
import pandas
from fincal.fincal import AllFrequencies, Frequency, TimeSeries, create_date_series
from fincal.fincal import AllFrequencies, TimeSeries, create_date_series
dfd = pandas.read_csv('test_files/nav_history_daily - Copy.csv')
dfd = pandas.read_csv('test_files/msft.csv')
dfm = pandas.read_csv('test_files/nav_history_monthly.csv')
dfq = pandas.read_csv('test_files/nav_history_quarterly.csv')
data_d = [(i.date, i.nav) for i in dfd.itertuples()]
data_m = [{'date': i.date, 'value': i.nav} for i in dfm.itertuples()]
data_q = {i.date: i.nav for i in dfq.itertuples()}
data_q.update({'14-02-2022': 93.7})
tsd = TimeSeries(data_d, frequency='D')
tsm = TimeSeries(data_m, frequency='M', date_format='%d-%m-%Y')
@ -18,6 +21,6 @@ tsq = TimeSeries(data_q, frequency='Q', date_format='%d-%m-%Y')
start = time.time()
# ts.calculate_rolling_returns(datetime.datetime(2015, 1, 1), datetime.datetime(2022, 2, 1), years=1)
# fdata = tsd.ffill()
bdata = tsq.bfill()
# rr = tsd.calculate_rolling_returns(datetime.datetime(2022, 1, 1), datetime.datetime(2022, 2, 1), years=1)
print(time.time() - start)

35
test.py Normal file
View File

@ -0,0 +1,35 @@
# type: ignore
import datetime
import time
import pandas as pd
from fincal.fincal import TimeSeries
df = pd.read_csv('test_files/nav_history_daily.csv')
df = df.sort_values(by=['amfi_code', 'date']) # type: ignore
data_list = [(i.date, i.nav) for i in df[df.amfi_code == 118825].itertuples()]
start = time.time()
ts_data = TimeSeries(data_list, frequency='M')
print(f"Instantiation took {round((time.time() - start)*1000, 2)} ms")
# ts_data.fill_missing_days()
start = time.time()
# ts_data.calculate_returns(as_on=datetime.datetime(2022, 1, 4), closest='next', years=1)
rr = ts_data.calculate_rolling_returns(datetime.datetime(2015, 1, 1),
datetime.datetime(2022, 1, 21),
frequency='M',
as_on_match='next',
prior_match='previous',
closest='previous',
years=1)
# ffill_data = ts_data.bfill()
print(f"Calculation took {round((time.time() - start)*1000, 2)} ms")
rr.sort()
for i in rr[:10]:
print(i)
# print(ffill_data)
# print(ts_data)
# print(repr(ts_data))

37
test2.py Normal file
View File

@ -0,0 +1,37 @@
# type: ignore
if __name__ == "__main__":
import datetime
import time
import pandas as pd
from fincal.fincal import TimeSeries
df = pd.read_csv('test_files/msft.csv')
df = df.sort_values(by='Date') # type: ignore
data_list = [(i.Date, i.Close) for i in df.itertuples()]
start = time.time()
ts_data = TimeSeries(data_list, frequency='D', date_format='%d-%m-%Y')
print(f"Instantiation took {round((time.time() - start)*1000, 2)} ms")
# ts_data.fill_missing_days()
start = time.time()
# ts_data.calculate_returns(as_on=datetime.datetime(2022, 1, 4), closest='next', years=1)
rr = ts_data.calculate_rolling_returns(datetime.datetime(1994, 1, 1),
datetime.datetime(2022, 2, 17),
frequency='D',
as_on_match='next',
prior_match='previous',
closest='previous',
years=1)
# ffill_data = ts_data.bfill()
print(f"Calculation took {round((time.time() - start)*1000, 2)} ms")
rr.sort()
for i in rr[:10]:
print(i)
# print(ffill_data)
# print(ts_data)
# print(repr(ts_data))

7560
test_files/msft.csv Normal file

File diff suppressed because it is too large Load Diff

25
test_series.py Normal file
View File

@ -0,0 +1,25 @@
import datetime
from fincal.core import Series
s1 = Series([2.5, 6.2, 5.6, 8.4, 7.4, 1.5, 9.6, 5])
dt_lst = [
datetime.datetime(2020, 12, 4, 0, 0),
datetime.datetime(2019, 5, 16, 0, 0),
datetime.datetime(2019, 9, 25, 0, 0),
datetime.datetime(2016, 2, 18, 0, 0),
datetime.datetime(2017, 8, 14, 0, 0),
datetime.datetime(2018, 1, 4, 0, 0),
datetime.datetime(2017, 5, 21, 0, 0),
datetime.datetime(2018, 7, 17, 0, 0),
datetime.datetime(2016, 4, 8, 0, 0),
datetime.datetime(2020, 1, 7, 0, 0),
datetime.datetime(2016, 12, 24, 0, 0),
datetime.datetime(2020, 6, 19, 0, 0),
datetime.datetime(2016, 3, 16, 0, 0),
datetime.datetime(2017, 4, 25, 0, 0),
datetime.datetime(2016, 7, 10, 0, 0)
]
s2 = Series(dt_lst)

7560
tests/data/msft.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
import datetime
from typing import Mapping
from fincal.core import AllFrequencies, Frequency, Series
from fincal.core import AllFrequencies, Frequency, Series, TimeSeriesCore
from fincal.fincal import create_date_series
@ -40,10 +41,41 @@ class TestAllFrequencies:
class TestSeries:
def test_creation(self):
series = Series([1, 2, 3, 4, 5, 6, 7], data_type=int)
series = Series([1, 2, 3, 4, 5, 6, 7], data_type='number')
assert series.dtype == float
assert series[2] == 3
dates = create_date_series('2021-01-01', '2021-01-31', 'D')
series = Series(dates, data_type=datetime.datetime)
assert Series.dtype == datetime.datetime
dates = create_date_series('2021-01-01', '2021-01-31', frequency='D')
series = Series(dates, data_type='date')
assert series.dtype == datetime.datetime
class TestTimeSeriesCore:
data = [('2021-01-01', 220), ('2021-02-01', 230), ('2021-03-01', 240)]
def test_creation(self):
ts = TimeSeriesCore(self.data, frequency='M')
assert isinstance(ts, TimeSeriesCore)
assert isinstance(ts, Mapping)
def test_getitem(self):
ts = TimeSeriesCore(self.data, frequency='M')
assert ts.dates[0] == datetime.datetime(2021, 1, 1, 0, 0)
assert ts.values[0] == 220
assert ts['2021-01-01'][1] == 220
assert len(ts[ts.dates > '2021-01-01']) == 2
assert ts[ts.dates == '2021-02-01'].iloc[0][1] == 230
assert ts.iloc[2][0] == datetime.datetime(2021, 3, 1)
assert len(ts.iloc[:2]) == 2
def test_contains(self):
ts = TimeSeriesCore(self.data, frequency='M')
assert datetime.datetime(2021, 1, 1) in ts
assert '2021-01-01' in ts
assert '2021-01-14' not in ts
def test_items(self):
ts = TimeSeriesCore(self.data, frequency='M')
for i, j in ts.items():
assert j == self.data[0][1]
break