From d88acc58880027beb4defe0ea8a65a2dd4cc9923 Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Sat, 2 Apr 2022 07:45:59 +0530 Subject: [PATCH] added read csv function --- README.md | 6 ++--- fincal/fincal.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ae655aa..887c8af 100644 --- a/README.md +++ b/README.md @@ -21,14 +21,14 @@ Fincal aims to simplify things by allowing you to: ### Core features - [ ] Add __setitem__ - [ ] Create emtpy TimeSeries object -- [ ] Read from CSV +- [x] Read from CSV - [ ] Write to CSV - [ ] Convert to dict - [ ] Convert to list of dicts ### Fincal features - [ ] Sync two TimeSeries -- [ ] Average rolling return +- [x] Average rolling return - [ ] Sharpe ratio - [ ] Jensen's Alpha - [ ] Beta -- [ ] Max drawdown \ No newline at end of file +- [x] Max drawdown \ No newline at end of file diff --git a/fincal/fincal.py b/fincal/fincal.py index f57dbc6..fafdc28 100644 --- a/fincal/fincal.py +++ b/fincal/fincal.py @@ -1,9 +1,11 @@ from __future__ import annotations +import csv import datetime import math +import pathlib import statistics -from typing import Iterable, List, Literal, Mapping, TypedDict, Union +from typing import Iterable, List, Literal, Mapping, Tuple, TypedDict, Union from dateutil.relativedelta import relativedelta @@ -581,6 +583,65 @@ class TimeSeries(TimeSeriesCore): return output_ts +def _preprocess_csv(file_path: str | pathlib.Path, delimiter: str = ",", encoding: str = "utf-8") -> List[list]: + """Preprocess csv data""" + + if isinstance(file_path, str): + file_path = pathlib.Path(file_path) + + if not file_path.exists(): + raise ValueError("File not found. Check the file path") + + with open(file_path, "r", encoding=encoding) as file: + reader = csv.reader(file, delimiter=delimiter) + csv_data = list(reader) + + csv_data = [i for i in csv_data if i] # remove blank rows + if not csv_data: + raise ValueError("File is empty") + + return csv_data + + +def read_csv( + csv_file_path: str | pathlib.Path, + frequency: Literal["D", "W", "M", "Q", "Y"], + date_format: str = None, + col_names: Tuple[str, str] = None, + col_index: Tuple[int, int] = (0, 1), + has_header: bool = True, + skip_rows: int = 0, + nrows: int = -1, + delimiter: str = ",", + encoding: str = "utf-8", +) -> TimeSeriesCore: + """Reads Time Series data directly from a CSV file""" + + data = _preprocess_csv(csv_file_path, delimiter, encoding) + + read_start_row = skip_rows + read_end_row = skip_rows + nrows if nrows >= 0 else None + + if has_header: + header = data[read_start_row] + print(header) + # fmt: off + # Black and pylance disagree on the foratting of the following line, hence formatting is disabled + data = data[(read_start_row + 1):read_end_row] + # fmt: on + + if col_names is not None: + date_col = header.index(col_names[0]) + value_col = header.index(col_names[1]) + else: + date_col = col_index[0] + value_col = col_index[1] + + ts_data = [(i[date_col], i[value_col]) for i in data if i] + + return TimeSeries(ts_data, frequency=frequency, date_format=date_format) + + if __name__ == "__main__": date_series = [ datetime.datetime(2020, 1, 11),