From 3a5ca9123488bfc86ce8e167a5a5f199e4bc632a Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Sun, 1 Jan 2023 18:01:57 +0530 Subject: [PATCH] documentation of read_csv --- pyfacts/pyfacts.py | 52 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/pyfacts/pyfacts.py b/pyfacts/pyfacts.py index 51e955c..d823c9a 100644 --- a/pyfacts/pyfacts.py +++ b/pyfacts/pyfacts.py @@ -568,6 +568,7 @@ class TimeSeries(TimeSeriesCore): Parameters ---------- kwargs: parameters to be passed to the calculate_rolling_returns() function + Refer TimeSeries.calculate_rolling_returns() method for more details Returns ------- @@ -866,7 +867,9 @@ class TimeSeries(TimeSeriesCore): return self.__class__(new_ts_dict, to_frequency.symbol) -def _preprocess_csv(file_path: str | pathlib.Path, delimiter: str = ",", encoding: str = "utf-8") -> List[list]: +def _preprocess_csv( + file_path: str | pathlib.Path, delimiter: str = ",", encoding: str = "utf-8", **kwargs +) -> List[list]: """Preprocess csv data""" if isinstance(file_path, str): @@ -876,7 +879,7 @@ def _preprocess_csv(file_path: str | pathlib.Path, delimiter: str = ",", encodin raise ValueError("File not found. Check the file path") with open(file_path, "r", encoding=encoding) as file: - reader: csv.reader = csv.reader(file, delimiter=delimiter) + reader: csv.reader = csv.reader(file, delimiter=delimiter, **kwargs) csv_data: list = list(reader) csv_data = [i for i in csv_data if i] # remove blank rows @@ -897,8 +900,51 @@ def read_csv( nrows: int = -1, delimiter: str = ",", encoding: str = "utf-8", + **kwargs, ) -> TimeSeries: - """Reads Time Series data directly from a CSV file""" + """Reads Time Series data directly from a CSV file + + Parameters + ---------- + csv_file_pah: + path of the csv file to be read. + + frequency: + frequency of the time series data. + + date_format: + date format, specified as datetime compatible string + + col_names: + specify the column headers to be read. + this parameter will allow you to read two columns from a CSV file which may have more columns. + this parameter overrides col_index parameter. + + dol_index: + specify the column numbers to be read. + this parameter will allow you to read two columns from a CSV file which may have more columns. + if neither names nor index is specified, the first two columns from the csv file will be read, + with the first being treated as date. + + has_header: + specify whether the file has a header row. + if true, the header row will be ignored while creating the time series data. + + skip_rows: + the number of rows after the header which should be skipped. + + nrows: + the number of rows to be read from the csv file. + + delimiter: + specify the delimeter used in the csv file. + + encoding: + specify the encoding of the csv file. + + kwargs: + other keyword arguments to be passed on the csv.reader() + """ data = _preprocess_csv(csv_file_path, delimiter, encoding)