Browse Source

deleted files not required

find_closest_changes
Gourav Kumar 2 years ago
parent
commit
c7e955f91e
  1. 15
      .ipynb_checkpoints/README-checkpoint.md
  2. 233
      .ipynb_checkpoints/testing-checkpoint.ipynb

15
.ipynb_checkpoints/README-checkpoint.md

@ -1,15 +0,0 @@
# Fincal
This module simplified handling of time-series data
## The problem
Time series data often have missing data points. These missing points mess things up when you are trying to do a comparison between two sections of a time series.
To make things worse, most libraries don't allow comparison based on dates. Month to Month and year to year comparisons become difficult as they cannot be translated into number of days. However, these are commonly used metrics while looking at financial data.
## The Solution
Fincal aims to simplify things by allowing you to:
* Compare time-series data based on dates
* Easy way to work around missing dates by taking the closest data points
* Completing series with missing data points using forward fill and backward fill
## Examples

233
.ipynb_checkpoints/testing-checkpoint.ipynb

@ -1,233 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3f7938c0-98e3-43b8-86e8-4f000cda7ce5",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import pandas as pd\n",
"\n",
"from fincal.fincal import TimeSeries\n",
"from fincal.core import Series"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4b8ccd5f-dfff-4202-82c4-f66a30c122b6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 152 ms, sys: 284 ms, total: 436 ms\n",
"Wall time: 61.3 ms\n"
]
},
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2021, 5, 28, 0, 0), 249.679993),\n",
"\t(datetime.datetime(2022, 1, 31, 0, 0), 310.980011)], frequency='D')"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"dfd = pd.read_csv('test_files/msft.csv')\n",
"# dfd = dfd[dfd['amfi_code'] == 118825].reset_index(drop=True)\n",
"ts = TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency='D')\n",
"repr(ts)\n",
"ts[['2022-01-31', '2021-05-28']]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a0232e05-27c7-4d2d-a4bc-5dcf42666983",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "Type List cannot be instantiated; use list() instead",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [3]\u001b[0m, in \u001b[0;36m<cell line: 7>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfincal\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Frequency\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m List, Tuple\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_test_data\u001b[39m(\n\u001b[1;32m 6\u001b[0m frequency: Frequency,\n\u001b[1;32m 7\u001b[0m num: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1000\u001b[39m,\n\u001b[1;32m 8\u001b[0m skip_weekends: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 9\u001b[0m mu: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.1\u001b[39m,\n\u001b[1;32m 10\u001b[0m sigma: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.05\u001b[39m,\n\u001b[1;32m 11\u001b[0m eomonth: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m---> 12\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[43mList\u001b[49m\u001b[43m(\u001b[49m\u001b[43mTuple\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 13\u001b[0m \u001b[38;5;124;03m\"\"\"Creates TimeSeries data\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \n\u001b[1;32m 15\u001b[0m \u001b[38;5;124;03m Parameters:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;124;03m Returns a TimeSeries object\u001b[39;00m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 38\u001b[0m start_date \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mdatetime(\u001b[38;5;241m2017\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/typing.py:941\u001b[0m, in \u001b[0;36m_BaseGenericAlias.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 939\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 940\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inst:\n\u001b[0;32m--> 941\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mType \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m cannot be instantiated; \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 942\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muse \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__origin__\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m() instead\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 943\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__origin__(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 944\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
"\u001b[0;31mTypeError\u001b[0m: Type List cannot be instantiated; use list() instead"
]
}
],
"source": [
"from fincal.fincal import create_date_series\n",
"from fincal.core import Frequency\n",
"from typing import List, Tuple\n",
"\n",
"def create_test_data(\n",
" frequency: Frequency,\n",
" num: int = 1000,\n",
" skip_weekends: bool = False,\n",
" mu: float = 0.1,\n",
" sigma: float = 0.05,\n",
" eomonth: bool = False,\n",
") -> List[Tuple]:\n",
" \"\"\"Creates TimeSeries data\n",
"\n",
" Parameters:\n",
" -----------\n",
" frequency: Frequency\n",
" The frequency of the time series data to be generated.\n",
"\n",
" num: int\n",
" Number of date: value pairs to be generated.\n",
"\n",
" skip_weekends: bool\n",
" Whether weekends (saturday, sunday) should be skipped.\n",
" Gets used only if the frequency is daily.\n",
"\n",
" mu: float\n",
" Mean return for the values.\n",
"\n",
" sigma: float\n",
" standard deviation of the values.\n",
"\n",
" Returns:\n",
" --------\n",
" Returns a TimeSeries object\n",
" \"\"\"\n",
"\n",
" start_date = datetime.datetime(2017, 1, 1)\n",
" timedelta_dict = {\n",
" frequency.freq_type: int(\n",
" frequency.value * num * (7 / 5 if frequency == AllFrequencies.D and skip_weekends else 1)\n",
" )\n",
" }\n",
" end_date = start_date + relativedelta(**timedelta_dict)\n",
" dates = create_date_series(start_date, end_date, frequency.symbol, skip_weekends=skip_weekends, eomonth=eomonth)\n",
" values = create_prices(1000, mu, sigma, num)\n",
" ts = list(zip(dates, values))\n",
" return ts"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53dbc8a6-d7b1-4d82-ac3d-ee3908ff086d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"id": "aa1584d5-1df0-4661-aeeb-5e8c424de06d",
"metadata": {},
"outputs": [],
"source": [
"from fincal import fincal\n",
"from fincal.core import FincalOptions\n",
"import csv"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7d51fca1-f731-47c8-99c9-6e199cfeca92",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['date', 'nav']\n",
"CPU times: user 47.7 ms, sys: 3.16 ms, total: 50.9 ms\n",
"Wall time: 50.3 ms\n"
]
},
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(1992, 2, 19, 0, 0), '2.398438'),\n",
"\t (datetime.datetime(1992, 2, 20, 0, 0), '2.447917'),\n",
"\t (datetime.datetime(1992, 2, 21, 0, 0), '2.385417')\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 16, 0, 0), '299.5'),\n",
"\t (datetime.datetime(2022, 2, 17, 0, 0), '290.730011'),\n",
"\t (datetime.datetime(2022, 2, 18, 0, 0), '287.929993')], frequency='M')"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"FincalOptions.date_format = '%Y-%m-%d'\n",
"fincal.read_csv('test_files/msft.csv', frequency='M')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b689f64c-6764-45b5-bccf-f23b351f6419",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "6c9b2dd7-9983-40cd-8ac4-3530a3892f17",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 61.4 ms, sys: 2.35 ms, total: 63.7 ms\n",
"Wall time: 62.6 ms\n"
]
}
],
"source": [
"%%time\n",
"dfd = pd.read_csv(\"test_files/msft.csv\")\n",
"ts = fincal.TimeSeries([(i.date, i.nav) for i in dfd.itertuples()], frequency=\"D\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading…
Cancel
Save