Compare commits

...

5 Commits

Author SHA1 Message Date
cd05250e8a now testing using Jupyter Notebook 2022-02-21 08:22:04 +05:30
47e11546a0 Created Series class for slicing support 2022-02-20 22:49:45 +05:30
141ec97a2c converted dates and values to properties
ts['dates'] and ts['values'] keys implemted
2022-02-20 21:52:33 +05:30
439fa86b5c ffill and bfill now return TimeSeries objects instead of lists 2022-02-20 21:51:54 +05:30
2c1d508734 implemented iterator 2022-02-20 21:36:44 +05:30
3 changed files with 246 additions and 5 deletions

View File

@ -1,5 +1,6 @@
import datetime import datetime
from dataclasses import dataclass from dataclasses import dataclass
from numbers import Number
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
@ -111,6 +112,67 @@ class IndexSlicer:
return item return item
class Series:
def __init__(self, data):
if not isinstance(data, Sequence):
raise TypeError("Series only supports creation using Sequence types")
if isinstance(data[0], bool):
self.data = data
self.dtype = bool
elif isinstance(data[0], Number):
self.dtype = float
self.data = [float(i) for i in data]
elif isinstance(data[0], str):
try:
data = [datetime.datetime.strptime(i, FincalOptions.date_format) for i in data]
self.dtype = datetime.datetime
except ValueError:
raise TypeError("Series does not support string data type")
elif isinstance(data[0], datetime.datetime):
self.dtype = datetime.datetime
self.data = data
else:
raise TypeError(f"Cannot create series object from {type(data).__name__} of {type(data[0]).__name__}")
def __repr__(self):
return f"{self.__class__.__name__}({self.data})"
def __getitem__(self, n):
return self.data[n]
def __len__(self):
return len(self.data)
def __gt__(self, other):
if self.dtype == bool:
raise TypeError("> not supported for boolean series")
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
gt = Series([i > other for i in self.data])
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return gt
def __lt__(self, other):
if self.dtype == bool:
raise TypeError("< not supported for boolean series")
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
lt = Series([i < other for i in self.data])
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return lt
def __eq__(self, other):
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
eq = Series([i == other for i in self.data])
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return eq
class TimeSeriesCore: class TimeSeriesCore:
"""Defines the core building blocks of a TimeSeries object""" """Defines the core building blocks of a TimeSeries object"""
@ -142,12 +204,36 @@ class TimeSeriesCore:
data = _preprocess_timeseries(data, date_format=date_format) data = _preprocess_timeseries(data, date_format=date_format)
self.time_series = dict(data) self.time_series = dict(data)
self.dates = list(self.time_series)
if len(self.time_series) != len(data): if len(self.time_series) != len(data):
print("Warning: The input data contains duplicate dates which have been ignored.") print("Warning: The input data contains duplicate dates which have been ignored.")
self.start_date = self.dates[0]
self.end_date = self.dates[-1]
self.frequency = getattr(AllFrequencies, frequency) self.frequency = getattr(AllFrequencies, frequency)
self.iter_num = -1
self._dates = None
self._values = None
self._start_date = None
self._end_date = None
@property
def dates(self):
if self._dates is None or len(self._dates) != len(self.time_series):
self._dates = list(self.time_series.keys())
return Series(self._dates)
@property
def values(self):
if self._values is None or len(self._values) != len(self.time_series):
self._values = list(self.time_series.values())
return Series(self._values)
@property
def start_date(self):
return self.dates[0]
@property
def end_date(self):
return self.dates[-1]
def _get_printable_slice(self, n: int): def _get_printable_slice(self, n: int):
"""Returns a slice of the dataframe from beginning and end""" """Returns a slice of the dataframe from beginning and end"""
@ -193,11 +279,25 @@ class TimeSeriesCore:
return printable_str return printable_str
def __getitem__(self, key): def __getitem__(self, key):
if isinstance(key, Series):
if not key.dtype == bool:
raise ValueError(f"Cannot slice {self.__class__.__name__} using a Series of {key.dtype.__name__}")
elif len(key) != len(self.dates):
raise Exception(f"Length of Series: {len(key)} did not match length of object: {len(self.dates)}")
else:
dates_to_return = [self.dates[i] for i, j in enumerate(key) if j]
data_to_return = [(key, self.time_series[key]) for key in dates_to_return]
return TimeSeriesCore(data_to_return)
if isinstance(key, int): if isinstance(key, int):
raise KeyError(f"{key}. For index based slicing, use .iloc[{key}]") raise KeyError(f"{key}. For index based slicing, use .iloc[{key}]")
elif isinstance(key, datetime.datetime): elif isinstance(key, datetime.datetime):
item = (key, self.time_series[key]) item = (key, self.time_series[key])
if isinstance(key, str): if isinstance(key, str):
if key == 'dates':
return self.dates
elif key == 'values':
return list(self.time_series.values())
try: try:
dt_key = datetime.datetime.strptime(key, FincalOptions.date_format) dt_key = datetime.datetime.strptime(key, FincalOptions.date_format)
item = (dt_key, self.time_series[dt_key]) item = (dt_key, self.time_series[dt_key])
@ -215,6 +315,18 @@ class TimeSeriesCore:
def __len__(self): def __len__(self):
return len(self.time_series) return len(self.time_series)
def __iter__(self):
self.n = 0
return self
def __next__(self):
if self.n >= len(self.dates):
raise StopIteration
else:
key = self.dates[self.n]
self.n += 1
return key, self.time_series[key]
def head(self, n: int = 6): def head(self, n: int = 6):
"""Returns the first n items of the TimeSeries object""" """Returns the first n items of the TimeSeries object"""

View File

@ -75,7 +75,7 @@ class TimeSeries(TimeSeriesCore):
self.time_series = new_ts self.time_series = new_ts
return None return None
return new_ts return TimeSeries(new_ts, frequency=self.frequency.symbol)
def bfill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]: def bfill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]:
"""Backward fill missing dates in the time series """Backward fill missing dates in the time series
@ -109,7 +109,7 @@ class TimeSeries(TimeSeriesCore):
self.time_series = new_ts self.time_series = new_ts
return None return None
return new_ts return TimeSeries(new_ts, frequency=self.frequency.symbol)
def calculate_returns( def calculate_returns(
self, self,

129
testing.ipynb Normal file
View File

@ -0,0 +1,129 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import pandas as pd\n",
"\n",
"from fincal.fincal import TimeSeries\n",
"from fincal.core import Series"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "757eafc2-f804-4e7e-a3b8-2d09cd62e646",
"metadata": {},
"outputs": [],
"source": [
"dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "59b3d4a9-8ef4-4652-9e20-1bac69ab4ff9",
"metadata": {},
"outputs": [],
"source": [
"dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4bc95ae0-8c33-4eab-acf9-e765d22979b8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: The input data contains duplicate dates which have been ignored.\n"
]
}
],
"source": [
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f2c3218c-3984-43d6-8638-41a74a9d0b58",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "getattr(): attribute name must be string",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m<timed eval>:1\u001b[0m, in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n",
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:203\u001b[0m, in \u001b[0;36mTimeSeries.calculate_rolling_returns\u001b[1;34m(self, from_date, to_date, frequency, as_on_match, prior_match, closest, compounding, years)\u001b[0m\n\u001b[0;32m 200\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid argument for frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfrequency\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 203\u001b[0m dates \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_date_series\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfrom_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mto_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m frequency \u001b[38;5;241m==\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mD:\n\u001b[0;32m 205\u001b[0m dates \u001b[38;5;241m=\u001b[39m [i \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m dates \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtime_series]\n",
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:16\u001b[0m, in \u001b[0;36mcreate_date_series\u001b[1;34m(start_date, end_date, frequency, eomonth)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_date_series\u001b[39m(\n\u001b[0;32m 12\u001b[0m start_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, end_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, frequency: \u001b[38;5;28mstr\u001b[39m, eomonth: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 13\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[datetime\u001b[38;5;241m.\u001b[39mdatetime]:\n\u001b[0;32m 14\u001b[0m \u001b[38;5;124;03m\"\"\"Creates a date series using a frequency\"\"\"\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m frequency \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mAllFrequencies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m eomonth \u001b[38;5;129;01mand\u001b[39;00m frequency\u001b[38;5;241m.\u001b[39mdays \u001b[38;5;241m<\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mdays:\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meomonth cannot be set to True if frequency is higher than \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mAllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mTypeError\u001b[0m: getattr(): attribute name must be string"
]
}
],
"source": [
"%%time\n",
"ts.calculate_rolling_returns(from_date='2020-01-01', to_date='2021-01-01')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}