Compare commits

...

4 Commits

Author SHA1 Message Date
03a8045400 implemented arithmatic dunder methods
Writing tests for the same is pending
2022-04-11 22:49:41 +05:30
625c9228e9 Docs and corrections based on tests 2022-04-11 22:19:29 +05:30
3ec5b06e83 Added tests for TSC comparisons 2022-04-11 22:19:17 +05:30
e8dbc16157 implemented comparison in TSC, improved comparisons in Series 2022-04-11 10:47:12 +05:30
2 changed files with 362 additions and 87 deletions

View File

@ -149,99 +149,72 @@ class Series(UserList):
else: else:
return self.data[i] return self.data[i]
def __gt__(self, other): def _comparison_validator(self, other):
if self.dtype == bool: """Validates other before making comparison"""
raise TypeError("> not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)): if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other) other = _parse_date(other)
return other
if isinstance(other, Series): if self.dtype == bool:
raise TypeError("Comparison operation not supported for boolean series")
elif isinstance(other, Series):
if len(self) != len(other): if len(self) != len(other):
raise ValueError("Length of Series must be same for comparison") raise ValueError("Length of Series must be same for comparison")
gt = Series([j > other[i] for i, j in enumerate(self)], "bool") elif (self.dtype != float and isinstance(other, Number)) or not isinstance(other, self.dtype):
elif self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
gt = Series([i > other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}") raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return gt return other
def __gt__(self, other):
other = self._comparison_validator(other)
if isinstance(other, Series):
return Series([j > other[i] for i, j in enumerate(self)], "bool")
return Series([i > other for i in self.data], "bool")
def __ge__(self, other): def __ge__(self, other):
if self.dtype == bool: other = self._comparison_validator(other)
raise TypeError(">= not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if isinstance(other, Series): if isinstance(other, Series):
if len(self) != len(other): return Series([j >= other[i] for i, j in enumerate(self)], "bool")
raise ValueError("Length of Series must be same for comparison")
ge = Series([j >= other[i] for i, j in enumerate(self)], "bool") return Series([i >= other for i in self.data], "bool")
elif self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
ge = Series([i >= other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return ge
def __lt__(self, other): def __lt__(self, other):
if self.dtype == bool: other = self._comparison_validator(other)
raise TypeError("< not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if isinstance(other, Series): if isinstance(other, Series):
if len(self) != len(other): return Series([j < other[i] for i, j in enumerate(self)], "bool")
raise ValueError("Length of Series must be same for comparison")
lt = Series([j < other[i] for i, j in enumerate(self)], "bool") return Series([i < other for i in self.data], "bool")
elif self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
lt = Series([i < other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return lt
def __le__(self, other): def __le__(self, other):
if self.dtype == bool: other = self._comparison_validator(other)
raise TypeError("<= not supported for boolean series")
if isinstance(other, (str, datetime.datetime, datetime.date)):
other = _parse_date(other)
if isinstance(other, Series): if isinstance(other, Series):
if len(self) != len(other): return Series([j <= other[i] for i, j in enumerate(self)], "bool")
raise ValueError("Length of Series must be same for comparison")
le = Series([j <= other[i] for i, j in enumerate(self)], "bool") return Series([i <= other for i in self.data], "bool")
elif self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype):
le = Series([i <= other for i in self.data], "bool")
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}")
return le
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, (str, datetime.datetime, datetime.date)): other = self._comparison_validator(other)
other = _parse_date(other)
if isinstance(other, Series): if isinstance(other, Series):
if len(self) != len(other): return Series([j == other[i] for i, j in enumerate(self)], "bool")
raise ValueError("Length of Series must be same for comparison")
eq = Series([j == other[i] for i, j in enumerate(self)], "bool") return Series([i == other for i in self.data], "bool")
elif self.dtype == float and isinstance(other, Number) or isinstance(other, self.dtype): def __ne__(self, other):
eq = Series([i == other for i in self.data], "bool") other = self._comparison_validator(other)
else:
raise Exception(f"Cannot compare type {self.dtype.__name__} to {type(other).__name__}") if isinstance(other, Series):
return eq return Series([j != other[i] for i, j in enumerate(self)], "bool")
return Series([i == other for i in self.data], "bool")
@Mapping.register @Mapping.register
@ -362,9 +335,13 @@ class TimeSeriesCore:
@date_parser(1) @date_parser(1)
def _get_item_from_date(self, date: str | datetime.datetime): def _get_item_from_date(self, date: str | datetime.datetime):
"""Helper function to retrieve item using a date"""
return date, self.data[date] return date, self.data[date]
def _get_item_from_key(self, key: str | datetime.datetime): def _get_item_from_key(self, key: str | datetime.datetime):
"""Helper function to implement special keys"""
if isinstance(key, int): if isinstance(key, int):
raise KeyError(f"{key}. \nHint: use .iloc[{key}] for index based slicing.") raise KeyError(f"{key}. \nHint: use .iloc[{key}] for index based slicing.")
@ -374,10 +351,17 @@ class TimeSeriesCore:
return self._get_item_from_date(key) return self._get_item_from_date(key)
def _get_item_from_list(self, date_list: Sequence[str | datetime.datetime]): def _get_item_from_list(self, date_list: Sequence[str | datetime.datetime]):
"""Helper function to retrieve items using a list"""
data_to_return = [self._get_item_from_key(key) for key in date_list] data_to_return = [self._get_item_from_key(key) for key in date_list]
return self.__class__(data_to_return, frequency=self.frequency.symbol) return self.__class__(data_to_return, frequency=self.frequency.symbol)
def _get_item_from_series(self, series: Series): def _get_item_from_series(self, series: Series):
"""Helper function to retrieve item using a Series object
A Series of type bool of equal length to the time series can be used.
A Series of dates can be used to filter out a set of dates.
"""
if series.dtype == bool: if series.dtype == bool:
if len(series) != len(self.dates): if len(series) != len(self.dates):
raise ValueError(f"Length of Series: {len(series)} did not match length of object: {len(self.dates)}") raise ValueError(f"Length of Series: {len(series)} did not match length of object: {len(self.dates)}")
@ -401,30 +385,6 @@ class TimeSeriesCore:
raise TypeError(f"Invalid type {repr(type(key).__name__)} for slicing.") raise TypeError(f"Invalid type {repr(type(key).__name__)} for slicing.")
def __gt__(self, other):
if isinstance(other, Number):
data = {k: v > other for k, v in self.data.items()}
if isinstance(other, TimeSeriesCore):
if self.dates != other.dates:
raise ValueError(
"Only objects with same set of dates can be compared.\n"
"Hint: use TimeSeries.sync() method to sync dates of two TimeSeries objects."
)
data = {dt: val > other[dt][1] for dt, val in self.data.items()}
if isinstance(other, Series):
if Series.dtype != float:
raise TypeError("Only Series of type float can be used for comparison")
if len(self) != len(other):
raise ValueError("Length of series does not match length of object")
data = {dt: val > other[i] for i, (dt, val) in enumerate(self.data.items())}
return self.__class__(data, frequency=self.frequency.symbol)
@date_parser(1) @date_parser(1)
def __setitem__(self, key: str | datetime.datetime, value: Number) -> None: def __setitem__(self, key: str | datetime.datetime, value: Number) -> None:
if not isinstance(value, Number): if not isinstance(value, Number):
@ -436,6 +396,116 @@ class TimeSeriesCore:
self.data.update({key: value}) self.data.update({key: value})
self.data = dict(sorted(self.data.items())) self.data = dict(sorted(self.data.items()))
@date_parser(1)
def __delitem__(self, key):
del self.data[key]
def _comparison_validator(self, other):
"""Validates the data before comparison is performed"""
if not isinstance(other, (Number, Series, TimeSeriesCore)):
raise TypeError(
f"Comparison cannot be performed between '{self.__class__.__name__}' and '{other.__class__.__name__}'"
)
if isinstance(other, TimeSeriesCore):
if any(self.dates != other.dates):
raise ValueError(
"Only objects with same set of dates can be compared.\n"
"Hint: use TimeSeries.sync() method to sync dates of two TimeSeries objects."
)
if isinstance(other, Series):
if other.dtype != float:
raise TypeError("Only Series of type float can be used for comparison")
if len(self) != len(other):
raise ValueError("Length of series does not match length of object")
def __gt__(self, other):
self._comparison_validator(other)
if isinstance(other, Number):
data = {k: v > other for k, v in self.data.items()}
if isinstance(other, TimeSeriesCore):
data = {dt: val > other[dt][1] for dt, val in self.data.items()}
if isinstance(other, Series):
data = {dt: val > other[i] for i, (dt, val) in enumerate(self.data.items())}
return self.__class__(data, frequency=self.frequency.symbol)
def __ge__(self, other):
self._comparison_validator(other)
if isinstance(other, Number):
data = {k: v >= other for k, v in self.data.items()}
if isinstance(other, TimeSeriesCore):
data = {dt: val >= other[dt][1] for dt, val in self.data.items()}
if isinstance(other, Series):
data = {dt: val >= other[i] for i, (dt, val) in enumerate(self.data.items())}
return self.__class__(data, frequency=self.frequency.symbol)
def __lt__(self, other):
self._comparison_validator(other)
if isinstance(other, Number):
data = {k: v < other for k, v in self.data.items()}
if isinstance(other, TimeSeriesCore):
data = {dt: val < other[dt][1] for dt, val in self.data.items()}
if isinstance(other, Series):
data = {dt: val < other[i] for i, (dt, val) in enumerate(self.data.items())}
return self.__class__(data, frequency=self.frequency.symbol)
def __le__(self, other):
self._comparison_validator(other)
if isinstance(other, Number):
data = {k: v <= other for k, v in self.data.items()}
if isinstance(other, TimeSeriesCore):
data = {dt: val <= other[dt][1] for dt, val in self.data.items()}
if isinstance(other, Series):
data = {dt: val <= other[i] for i, (dt, val) in enumerate(self.data.items())}
return self.__class__(data, frequency=self.frequency.symbol)
def __eq__(self, other):
self._comparison_validator(other)
if isinstance(other, Number):
data = {k: v == other for k, v in self.data.items()}
if isinstance(other, TimeSeriesCore):
data = {dt: val == other[dt][1] for dt, val in self.data.items()}
if isinstance(other, Series):
data = {dt: val == other[i] for i, (dt, val) in enumerate(self.data.items())}
return self.__class__(data, frequency=self.frequency.symbol)
def __ne__(self, other):
self._comparison_validator(other)
if isinstance(other, Number):
data = {k: v != other for k, v in self.data.items()}
if isinstance(other, TimeSeriesCore):
data = {dt: val != other[dt][1] for dt, val in self.data.items()}
if isinstance(other, Series):
data = {dt: val != other[i] for i, (dt, val) in enumerate(self.data.items())}
return self.__class__(data, frequency=self.frequency.symbol)
def __iter__(self): def __iter__(self):
self.n = 0 self.n = 0
return self return self
@ -455,6 +525,121 @@ class TimeSeriesCore:
def __contains__(self, key: object) -> bool: def __contains__(self, key: object) -> bool:
return key in self.data return key in self.data
def _arithmatic_validator(self, other):
"""Validates input data before performing math operatios"""
if not isinstance(other, (Number, Series, TimeSeriesCore)):
raise TypeError(
"Cannot perform mathematical operations between "
f"'{self.__class__.__name__}' and '{other.__class__.__name__}'"
)
if isinstance(other, TimeSeriesCore):
if len(other) != len(self):
raise ValueError("Can only perform mathematical operations between objects of same length.")
if any(self.dates != other.dates):
raise ValueError("Can only perform mathematical operations between objects having same dates.")
if isinstance(other, Series):
if other.dtype != float:
raise TypeError(
"Cannot perform mathematical operations with "
f"'{other.__class__.__name__}' of type '{other.dtype}'"
)
if len(other) != len(self):
raise ValueError("Can only perform mathematical operations between objects of same length.")
def __add__(self, other):
self._arithmatic_validator(other)
if isinstance(other, TimeSeriesCore):
other = other.values
if isinstance(other, Series):
data = {dt: val + other[i] for i, (dt, val) in enumerate(self.data.items())}
elif isinstance(other, Number):
data = {dt: val + other for dt, val in self.data.items()}
return self.__class__(data, self.frequency.symbol)
def __sub__(self, other):
self._arithmatic_validator(other)
if isinstance(other, TimeSeriesCore):
other = other.values
if isinstance(other, Series):
data = {dt: val - other[i] for i, (dt, val) in enumerate(self.data.items())}
elif isinstance(other, Number):
data = {dt: val - other for dt, val in self.data.items()}
return self.__class__(data, self.frequency.symbol)
def __truediv__(self, other):
self._arithmatic_validator(other)
if isinstance(other, TimeSeriesCore):
other = other.values
if isinstance(other, Series):
data = {dt: val / other[i] for i, (dt, val) in enumerate(self.data.items())}
elif isinstance(other, Number):
data = {dt: val / other for dt, val in self.data.items()}
return self.__class__(data, self.frequency.symbol)
def __floordiv__(self, other):
self._arithmatic_validator(other)
if isinstance(other, TimeSeriesCore):
other = other.values
if isinstance(other, Series):
data = {dt: val // other[i] for i, (dt, val) in enumerate(self.data.items())}
elif isinstance(other, Number):
data = {dt: val // other for dt, val in self.data.items()}
return self.__class__(data, self.frequency.symbol)
def __mul__(self, other):
self._arithmatic_validator(other)
if isinstance(other, TimeSeriesCore):
other = other.values
if isinstance(other, Series):
data = {dt: val * other[i] for i, (dt, val) in enumerate(self.data.items())}
elif isinstance(other, Number):
data = {dt: val * other for dt, val in self.data.items()}
return self.__class__(data, self.frequency.symbol)
def __mod__(self, other):
self._arithmatic_validator(other)
if isinstance(other, TimeSeriesCore):
other = other.values
if isinstance(other, Series):
data = {dt: val % other[i] for i, (dt, val) in enumerate(self.data.items())}
elif isinstance(other, Number):
data = {dt: val % other for dt, val in self.data.items()}
return self.__class__(data, self.frequency.symbol)
def __pow__(self, other):
self._arithmatic_validator(other)
if isinstance(other, TimeSeriesCore):
other = other.values
if isinstance(other, Series):
data = {dt: val ** other[i] for i, (dt, val) in enumerate(self.data.items())}
elif isinstance(other, Number):
data = {dt: val**other for dt, val in self.data.items()}
return self.__class__(data, self.frequency.symbol)
@date_parser(1) @date_parser(1)
def get(self, date: str | datetime.datetime, default=None, closest=None): def get(self, date: str | datetime.datetime, default=None, closest=None):

View File

@ -244,3 +244,93 @@ class TestTimeSeriesCoreHeadTail:
assert isinstance(head_tail_ts, TimeSeriesCore) assert isinstance(head_tail_ts, TimeSeriesCore)
assert "2021-07-01" in head_tail_ts assert "2021-07-01" in head_tail_ts
assert head_tail_ts.iloc[1][1] == 290 assert head_tail_ts.iloc[1][1] == 290
class TestDelitem:
data = [
("2021-01-01", 220),
("2021-02-01", 230),
("2021-03-01", 240),
("2021-04-01", 250),
]
def test_deletion(self):
ts = TimeSeriesCore(self.data, "M")
assert len(ts) == 4
del ts["2021-03-01"]
assert len(ts) == 3
assert "2021-03-01" not in ts
with pytest.raises(KeyError):
del ts["2021-03-01"]
class TestTimeSeriesComparisons:
data1 = [
("2021-01-01", 220),
("2021-02-01", 230),
("2021-03-01", 240),
("2021-04-01", 250),
]
data2 = [
("2021-01-01", 240),
("2021-02-01", 210),
("2021-03-01", 240),
("2021-04-01", 270),
]
def test_number_comparison(self):
ts1 = TimeSeriesCore(self.data1, "M")
assert isinstance(ts1 > 23, TimeSeriesCore)
assert (ts1 > 230).values == Series([0.0, 0.0, 1.0, 1.0], "float")
assert (ts1 >= 230).values == Series([0.0, 1.0, 1.0, 1.0], "float")
assert (ts1 < 240).values == Series([1.0, 1.0, 0.0, 0.0], "float")
assert (ts1 <= 240).values == Series([1.0, 1.0, 1.0, 0.0], "float")
assert (ts1 == 240).values == Series([0.0, 0.0, 1.0, 0.0], "float")
assert (ts1 != 240).values == Series([1.0, 1.0, 0.0, 1.0], "float")
def test_series_comparison(self):
ts1 = TimeSeriesCore(self.data1, "M")
ser = Series([240, 210, 240, 270], data_type="int")
assert (ts1 > ser).values == Series([0.0, 1.0, 0.0, 0.0], "float")
assert (ts1 >= ser).values == Series([0.0, 1.0, 1.0, 0.0], "float")
assert (ts1 < ser).values == Series([1.0, 0.0, 0.0, 1.0], "float")
assert (ts1 <= ser).values == Series([1.0, 0.0, 1.0, 1.0], "float")
assert (ts1 == ser).values == Series([0.0, 0.0, 1.0, 0.0], "float")
assert (ts1 != ser).values == Series([1.0, 1.0, 0.0, 1.0], "float")
def test_tsc_comparison(self):
ts1 = TimeSeriesCore(self.data1, "M")
ts2 = TimeSeriesCore(self.data2, "M")
assert (ts1 > ts2).values == Series([0.0, 1.0, 0.0, 0.0], "float")
assert (ts1 >= ts2).values == Series([0.0, 1.0, 1.0, 0.0], "float")
assert (ts1 < ts2).values == Series([1.0, 0.0, 0.0, 1.0], "float")
assert (ts1 <= ts2).values == Series([1.0, 0.0, 1.0, 1.0], "float")
assert (ts1 == ts2).values == Series([0.0, 0.0, 1.0, 0.0], "float")
assert (ts1 != ts2).values == Series([1.0, 1.0, 0.0, 1.0], "float")
def test_errors(self):
ts1 = TimeSeriesCore(self.data1, "M")
ts2 = TimeSeriesCore(self.data2, "M")
ser = Series([240, 210, 240], data_type="int")
ser2 = Series(["2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01"], data_type="date")
del ts2["2021-04-01"]
with pytest.raises(TypeError):
ts1 == "a"
with pytest.raises(ValueError):
ts1 > ts2
with pytest.raises(TypeError):
ts1 == ser2
with pytest.raises(ValueError):
ts1 <= ser
with pytest.raises(TypeError):
ts2 < [23, 24, 25, 26]