Compare commits

..

5 Commits

Author SHA1 Message Date
cd05250e8a now testing using Jupyter Notebook 2022-02-21 08:22:04 +05:30
47e11546a0 Created Series class for slicing support 2022-02-20 22:49:45 +05:30
141ec97a2c converted dates and values to properties
ts['dates'] and ts['values'] keys implemted
2022-02-20 21:52:33 +05:30
439fa86b5c ffill and bfill now return TimeSeries objects instead of lists 2022-02-20 21:51:54 +05:30
2c1d508734 implemented iterator 2022-02-20 21:36:44 +05:30
3 changed files with 246 additions and 5 deletions

View File

@ -1,5 +1,6 @@
import datetime
from dataclasses import dataclass
from numbers import Number
from typing import Iterable, List, Literal, Mapping, Sequence, Tuple, Union
@ -111,6 +112,67 @@ class IndexSlicer:
return item
class Series:
def __init__(self, data):
if not isinstance(data, Sequence):
raise TypeError("Series only supports creation using Sequence types")
if isinstance(data[0], bool):
self.data = data
self.dtype = bool
elif isinstance(data[0], Number):
self.dtype = float
self.data = [float(i) for i in data]
elif isinstance(data[0], str):
try:
data = [datetime.datetime.strptime(i, FincalOptions.date_format) for i in data]
self.dtype = datetime.datetime
except ValueError:
raise TypeError("Series does not support string data type")
elif isinstance(data[0], datetime.datetime):
self.dtype = datetime.datetime
self.data = data
else:
raise TypeError(f"Cannot create series object from {type(data).__name__} of {type(data[0]).__name__}")
def __repr__(self):
return f"{self.__class__.__name__}({self.data})"
def __getitem__(self, n):
return self.data[n]
def __len__(self):
return len(self.data)
def __gt__(self, other):
if self.dtype == bool:
raise TypeError("> not supported for boolean series")
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
gt = Series([i > other for i in self.data])
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return gt
def __lt__(self, other):
if self.dtype == bool:
raise TypeError("< not supported for boolean series")
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
lt = Series([i < other for i in self.data])
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return lt
def __eq__(self, other):
if self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
eq = Series([i == other for i in self.data])
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return eq
class TimeSeriesCore:
"""Defines the core building blocks of a TimeSeries object"""
@ -142,12 +204,36 @@ class TimeSeriesCore:
data = _preprocess_timeseries(data, date_format=date_format)
self.time_series = dict(data)
self.dates = list(self.time_series)
if len(self.time_series) != len(data):
print("Warning: The input data contains duplicate dates which have been ignored.")
self.start_date = self.dates[0]
self.end_date = self.dates[-1]
self.frequency = getattr(AllFrequencies, frequency)
self.iter_num = -1
self._dates = None
self._values = None
self._start_date = None
self._end_date = None
@property
def dates(self):
if self._dates is None or len(self._dates) != len(self.time_series):
self._dates = list(self.time_series.keys())
return Series(self._dates)
@property
def values(self):
if self._values is None or len(self._values) != len(self.time_series):
self._values = list(self.time_series.values())
return Series(self._values)
@property
def start_date(self):
return self.dates[0]
@property
def end_date(self):
return self.dates[-1]
def _get_printable_slice(self, n: int):
"""Returns a slice of the dataframe from beginning and end"""
@ -193,11 +279,25 @@ class TimeSeriesCore:
return printable_str
def __getitem__(self, key):
if isinstance(key, Series):
if not key.dtype == bool:
raise ValueError(f"Cannot slice {self.__class__.__name__} using a Series of {key.dtype.__name__}")
elif len(key) != len(self.dates):
raise Exception(f"Length of Series: {len(key)} did not match length of object: {len(self.dates)}")
else:
dates_to_return = [self.dates[i] for i, j in enumerate(key) if j]
data_to_return = [(key, self.time_series[key]) for key in dates_to_return]
return TimeSeriesCore(data_to_return)
if isinstance(key, int):
raise KeyError(f"{key}. For index based slicing, use .iloc[{key}]")
elif isinstance(key, datetime.datetime):
item = (key, self.time_series[key])
if isinstance(key, str):
if key == 'dates':
return self.dates
elif key == 'values':
return list(self.time_series.values())
try:
dt_key = datetime.datetime.strptime(key, FincalOptions.date_format)
item = (dt_key, self.time_series[dt_key])
@ -215,6 +315,18 @@ class TimeSeriesCore:
def __len__(self):
return len(self.time_series)
def __iter__(self):
self.n = 0
return self
def __next__(self):
if self.n >= len(self.dates):
raise StopIteration
else:
key = self.dates[self.n]
self.n += 1
return key, self.time_series[key]
def head(self, n: int = 6):
"""Returns the first n items of the TimeSeries object"""

View File

@ -75,7 +75,7 @@ class TimeSeries(TimeSeriesCore):
self.time_series = new_ts
return None
return new_ts
return TimeSeries(new_ts, frequency=self.frequency.symbol)
def bfill(self, inplace: bool = False, limit: int = None) -> Union[TimeSeries, None]:
"""Backward fill missing dates in the time series
@ -109,7 +109,7 @@ class TimeSeries(TimeSeriesCore):
self.time_series = new_ts
return None
return new_ts
return TimeSeries(new_ts, frequency=self.frequency.symbol)
def calculate_returns(
self,

129
testing.ipynb Normal file
View File

@ -0,0 +1,129 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import pandas as pd\n",
"\n",
"from fincal.fincal import TimeSeries\n",
"from fincal.core import Series"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "757eafc2-f804-4e7e-a3b8-2d09cd62e646",
"metadata": {},
"outputs": [],
"source": [
"dfd = pd.read_csv('test_files/nav_history_daily - copy.csv')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "59b3d4a9-8ef4-4652-9e20-1bac69ab4ff9",
"metadata": {},
"outputs": [],
"source": [
"dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4bc95ae0-8c33-4eab-acf9-e765d22979b8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: The input data contains duplicate dates which have been ignored.\n"
]
}
],
"source": [
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f2c3218c-3984-43d6-8638-41a74a9d0b58",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.53299999999999)], frequency='D')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "dc469722-c816-4b57-8d91-7a3b865f86be",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "getattr(): attribute name must be string",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m<timed eval>:1\u001b[0m, in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n",
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:203\u001b[0m, in \u001b[0;36mTimeSeries.calculate_rolling_returns\u001b[1;34m(self, from_date, to_date, frequency, as_on_match, prior_match, closest, compounding, years)\u001b[0m\n\u001b[0;32m 200\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid argument for frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfrequency\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 203\u001b[0m dates \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_date_series\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfrom_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mto_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m frequency \u001b[38;5;241m==\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mD:\n\u001b[0;32m 205\u001b[0m dates \u001b[38;5;241m=\u001b[39m [i \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m dates \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtime_series]\n",
"File \u001b[1;32mD:\\Documents\\Projects\\fincal\\fincal\\fincal.py:16\u001b[0m, in \u001b[0;36mcreate_date_series\u001b[1;34m(start_date, end_date, frequency, eomonth)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_date_series\u001b[39m(\n\u001b[0;32m 12\u001b[0m start_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, end_date: datetime\u001b[38;5;241m.\u001b[39mdatetime, frequency: \u001b[38;5;28mstr\u001b[39m, eomonth: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 13\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[datetime\u001b[38;5;241m.\u001b[39mdatetime]:\n\u001b[0;32m 14\u001b[0m \u001b[38;5;124;03m\"\"\"Creates a date series using a frequency\"\"\"\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m frequency \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mAllFrequencies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m eomonth \u001b[38;5;129;01mand\u001b[39;00m frequency\u001b[38;5;241m.\u001b[39mdays \u001b[38;5;241m<\u001b[39m AllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mdays:\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meomonth cannot be set to True if frequency is higher than \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mAllFrequencies\u001b[38;5;241m.\u001b[39mM\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mTypeError\u001b[0m: getattr(): attribute name must be string"
]
}
],
"source": [
"%%time\n",
"ts.calculate_rolling_returns(from_date='2020-01-01', to_date='2021-01-01')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}