PyFacts/testing.ipynb
2022-05-24 21:11:15 +05:30

717 lines
21 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "e40a5526-458a-4d11-8eaa-3b584f723738",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import fincal as fc\n",
"import datetime\n",
"from dateutil.relativedelta import relativedelta"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a54bfbdf",
"metadata": {},
"outputs": [],
"source": [
"data = [\n",
" (\"2022-01-01\", 10),\n",
" (\"2022-01-02\", 12),\n",
" (\"2022-01-03\", 14),\n",
" (\"2022-01-04\", 16)\n",
" # (\"2022-01-06\", 18),\n",
" # (\"2022-01-07\", 20),\n",
" # (\"2022-01-09\", 22),\n",
" # (\"2022-01-10\", 24),\n",
" # (\"2022-01-11\", 26),\n",
" # (\"2022-01-12\", 28),\n",
" # (\"2023-01-01\", 30),\n",
" # (\"2023-01-02\", 32),\n",
" # (\"2023-01-03\", 34),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "fcc5f8f1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 10.0),\n",
"\t(datetime.datetime(2022, 1, 2, 0, 0), 12.0),\n",
"\t(datetime.datetime(2022, 1, 3, 0, 0), 14.0),\n",
"\t(datetime.datetime(2022, 1, 4, 0, 0), 16.0)], frequency='D')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts = fc.TimeSeries(data, 'D')\n",
"ts2 = fc.TimeSeries(data, 'D')\n",
"ts"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "c091da16-d3a2-4d5b-93da-099d67373932",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Series([datetime.datetime(2021, 1, 1, 0, 0), datetime.datetime(2021, 1, 2, 0, 0)], data_type='datetime')"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fc.Series(['2021-01-01', '2021-01-02'], data_type='date')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "77fc30d8-2843-40c4-9842-d943e6ef9813",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Series([11.0, 14.0, 17.0, 20.0], data_type='float')"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts.values + fc.Series([1, 2, 3, 4])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8e812756",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "TimeSeries can be only expanded to a higher frequency",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [8]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexpand\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mW\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mffill\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Documents/projects/fincal/fincal/fincal.py:624\u001b[0m, in \u001b[0;36mTimeSeries.expand\u001b[0;34m(self, to_frequency, method, skip_weekends, eomonth)\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid argument for to_frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mto_frequency\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m to_frequency\u001b[38;5;241m.\u001b[39mdays \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfrequency\u001b[38;5;241m.\u001b[39mdays:\n\u001b[0;32m--> 624\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTimeSeries can be only expanded to a higher frequency\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 626\u001b[0m new_dates \u001b[38;5;241m=\u001b[39m create_date_series(\n\u001b[1;32m 627\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstart_date,\n\u001b[1;32m 628\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mend_date,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 632\u001b[0m ensure_coverage\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 633\u001b[0m )\n\u001b[1;32m 635\u001b[0m closest: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprevious\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m method \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mffill\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnext\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
"\u001b[0;31mValueError\u001b[0m: TimeSeries can be only expanded to a higher frequency"
]
}
],
"source": [
"ts.expand('W', 'ffill')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "55918da9-2df6-4773-9ca0-e19b52c3ece2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2022, 1, 1, 0, 0), 10),\n",
"\t(datetime.datetime(2022, 4, 1, 0, 0), 28),\n",
"\t(datetime.datetime(2022, 7, 1, 0, 0), 28),\n",
"\t(datetime.datetime(2022, 10, 1, 0, 0), 28),\n",
"\t(datetime.datetime(2023, 1, 1, 0, 0), 30)], frequency='Q')"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts.shrink('Q', 'ffill')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9431eb8c",
"metadata": {},
"outputs": [],
"source": [
"from fincal.utils import _is_eomonth"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "36eefec7-7dbf-4a28-ac50-2e502d9d6864",
"metadata": {},
"outputs": [],
"source": [
"weekly_data = [\n",
" ('2018-01-31', 26),\n",
" ('2018-02-28', 44),\n",
" ('2018-03-30', 40),\n",
" ('2018-04-30', 36),\n",
" ('2018-05-31', 31),\n",
" ('2018-06-30', 45),\n",
" ('2018-07-30', 31),\n",
" ('2018-08-31', 42),\n",
" ('2018-09-30', 40),\n",
" ('2018-10-30', 30),\n",
" ('2018-11-30', 35),\n",
" ('2018-12-31', 37),\n",
" ('2019-01-31', 31),\n",
" ('2019-02-28', 44),\n",
" ('2019-03-31', 31),\n",
" ('2019-04-29', 32),\n",
" ('2019-05-30', 39),\n",
" ('2019-06-30', 27),\n",
" ('2019-07-31', 35),\n",
" ('2019-08-31', 33),\n",
" ('2019-09-30', 29),\n",
" ('2019-10-30', 26),\n",
" ('2019-11-30', 39),\n",
" ('2019-12-30', 30),\n",
" ('2020-01-30', 29)\n",
"]\n",
"week_ts = fc.TimeSeries(weekly_data, 'W')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e1071f90",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"_is_eomonth(week_ts.dates)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "d64dd3c6-4295-4301-90e4-5c74ea23c4af",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(datetime.datetime(2017, 1, 1, 0, 0), 67)\n",
"(datetime.datetime(2017, 2, 1, 0, 0), 85)\n",
"(datetime.datetime(2017, 3, 1, 0, 0), 76)\n",
"(datetime.datetime(2017, 4, 1, 0, 0), 78)\n",
"(datetime.datetime(2017, 5, 1, 0, 0), 65)\n",
"(datetime.datetime(2017, 6, 1, 0, 0), 74)\n"
]
}
],
"source": [
"for i in week_ts.shrink('M', 'ffill', skip_weekends=True):\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a549c5c0-c89a-4cc3-b396-c4afa77a9879",
"metadata": {},
"outputs": [],
"source": [
"week_ts.sync(ts)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "4755aea3-3655-4651-91d2-8e54c24303bc",
"metadata": {},
"outputs": [],
"source": [
"import fincal as fc"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "bd9887b3-d98a-4c80-8f95-ef7b7f19ded4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['date', 'nav']\n",
"CPU times: user 57.5 ms, sys: 3.38 ms, total: 60.8 ms\n",
"Wall time: 60.5 ms\n"
]
}
],
"source": [
"%%time\n",
"ts = fc.read_csv('test_files/msft.csv', frequency='D')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b7c176d4-d89f-4bda-9d67-75463eb90468",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(datetime.datetime(2022, 2, 11, 0, 0), 295.040009)\n",
"(datetime.datetime(2022, 2, 12, 0, 0), 296.0)\n",
"(datetime.datetime(2022, 2, 13, 0, 0), 296.0)\n",
"(datetime.datetime(2022, 2, 14, 0, 0), 295.0)\n",
"(datetime.datetime(2022, 2, 15, 0, 0), 300.470001)\n",
"(datetime.datetime(2022, 2, 16, 0, 0), 299.5)\n",
"(datetime.datetime(2022, 2, 17, 0, 0), 290.730011)\n",
"(datetime.datetime(2022, 2, 18, 0, 0), 287.929993)\n"
]
}
],
"source": [
"for i in ts.tail(8):\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "69c57754-a6fb-4881-9359-ba17c7fb8be5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.76 ms, sys: 123 µs, total: 1.88 ms\n",
"Wall time: 1.88 ms\n"
]
}
],
"source": [
"%%time\n",
"ts['2022-02-12'] = 296"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7aa02023-406e-4700-801c-c06390ddf914",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.61 ms, sys: 68 µs, total: 3.68 ms\n",
"Wall time: 3.7 ms\n"
]
},
{
"data": {
"text/plain": [
"{'start_date': datetime.datetime(1999, 12, 27, 0, 0),\n",
" 'end_date': datetime.datetime(2009, 3, 9, 0, 0),\n",
" 'drawdown': -0.7456453305351521}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"ts.max_drawdown()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "72cb4da4-1318-4b9b-b563-adac46accfb3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from typing import Mapping\n",
"isinstance(ts, Mapping)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "96bbecbf",
"metadata": {},
"outputs": [],
"source": [
"import fincal as fc"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "19199c92",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['amfi_code', 'date', 'nav']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/gourav/Documents/projects/fincal/fincal/core.py:308: UserWarning: The input data contains duplicate dates which have been ignored.\n",
" warnings.warn(\"The input data contains duplicate dates which have been ignored.\")\n"
]
},
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2013, 1, 2, 0, 0), 18.972),\n",
"\t (datetime.datetime(2013, 1, 3, 0, 0), 19.011),\n",
"\t (datetime.datetime(2013, 1, 4, 0, 0), 19.008)\n",
"\t ...\n",
"\t (datetime.datetime(2022, 2, 10, 0, 0), 86.5),\n",
"\t (datetime.datetime(2022, 2, 11, 0, 0), 85.226),\n",
"\t (datetime.datetime(2022, 2, 14, 0, 0), 82.533)], frequency='D')"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts = fc.read_csv('test_files/nav_history_daily - copy.csv', col_index=(1, 2), frequency='D', date_format='%d-%m-%y')\n",
"ts"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "51c9ae9a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.12031455056454916"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fc.sharpe_ratio(\n",
" ts,\n",
" risk_free_rate=0.06,\n",
" from_date='2013-02-04',\n",
" to_date='2022-02-14',\n",
" return_period_unit='months',\n",
" return_period_value=1\n",
")"
]
},
{
"cell_type": "markdown",
"id": "b3fb7b59-eaa3-41a5-b1ab-89d63b69edb0",
"metadata": {},
"source": [
"# Data generator"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "aead3e77-2670-4541-846a-5537b01f3d2e",
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"import math\n",
"import fincal as fc\n",
"from typing import List\n",
"import datetime\n",
"from dateutil.relativedelta import relativedelta"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f287e05f",
"metadata": {},
"outputs": [],
"source": [
"def create_prices(s0: float, mu: float, sigma: float, num_prices: int) -> list:\n",
" \"\"\"Generates a price following a geometric brownian motion process based on the input of the arguments.\n",
"\n",
" Since this function is used only to generate data for tests, the seed is fixed as 1234.\n",
" Many of the tests rely on exact values generated using this seed.\n",
" If the seed is changed, those tests will fail.\n",
"\n",
" Parameters:\n",
" ------------\n",
" s0: float\n",
" Asset inital price.\n",
"\n",
" mu: float\n",
" Interest rate expressed annual terms.\n",
"\n",
" sigma: float\n",
" Volatility expressed annual terms.\n",
"\n",
" num_prices: int\n",
" number of prices to generate\n",
"\n",
" Returns:\n",
" --------\n",
" Returns a list of values generated using GBM algorithm\n",
" \"\"\"\n",
"\n",
" random.seed(1234) # WARNING! Changing the seed will cause most tests to fail\n",
" all_values = []\n",
" for _ in range(num_prices):\n",
" s0 *= math.exp(\n",
" (mu - 0.5 * sigma**2) * (1.0 / 365.0) + sigma * math.sqrt(1.0 / 365.0) * random.gauss(mu=0, sigma=1)\n",
" )\n",
" all_values.append(round(s0, 2))\n",
"\n",
" return all_values\n",
"\n",
"\n",
"def sample_data_generator(\n",
" frequency: fc.Frequency,\n",
" num: int = 1000,\n",
" skip_weekends: bool = False,\n",
" mu: float = 0.1,\n",
" sigma: float = 0.05,\n",
" eomonth: bool = False,\n",
") -> List[tuple]:\n",
" \"\"\"Creates TimeSeries data\n",
"\n",
" Parameters:\n",
" -----------\n",
" frequency: Frequency\n",
" The frequency of the time series data to be generated.\n",
"\n",
" num: int\n",
" Number of date: value pairs to be generated.\n",
"\n",
" skip_weekends: bool\n",
" Whether weekends (saturday, sunday) should be skipped.\n",
" Gets used only if the frequency is daily.\n",
"\n",
" mu: float\n",
" Mean return for the values.\n",
"\n",
" sigma: float\n",
" standard deviation of the values.\n",
"\n",
" Returns:\n",
" --------\n",
" Returns a TimeSeries object\n",
" \"\"\"\n",
"\n",
" start_date = datetime.datetime(2017, 1, 1)\n",
" timedelta_dict = {\n",
" frequency.freq_type: int(\n",
" frequency.value * num * (7 / 5 if frequency == fc.AllFrequencies.D and skip_weekends else 1)\n",
" )\n",
" }\n",
" end_date = start_date + relativedelta(**timedelta_dict)\n",
" dates = fc.create_date_series(start_date, end_date, frequency.symbol, skip_weekends=skip_weekends, eomonth=eomonth)\n",
" values = create_prices(1000, mu, sigma, num)\n",
" ts = list(zip(dates, values))\n",
" return ts\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c85b5dd9-9a88-4608-ac58-1a141295f63f",
"metadata": {},
"outputs": [],
"source": [
"data = sample_data_generator(num=261, frequency=fc.AllFrequencies.W)\n",
"ts = fc.TimeSeries(data, \"W\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "0488a4d0-bca1-4341-9fae-1fd254adc0dc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimeSeries([(datetime.datetime(2017, 1, 1, 0, 0), 1003.03),\n",
"\t (datetime.datetime(2017, 1, 8, 0, 0), 1002.71),\n",
"\t (datetime.datetime(2017, 1, 15, 0, 0), 1008.77)\n",
"\t ...\n",
"\t (datetime.datetime(2021, 12, 12, 0, 0), 1107.21),\n",
"\t (datetime.datetime(2021, 12, 19, 0, 0), 1106.66),\n",
"\t (datetime.datetime(2021, 12, 26, 0, 0), 1104.32)], frequency='W')"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "04624145-4fce-484c-aa69-0d17d159b598",
"metadata": {},
"outputs": [],
"source": [
"tst = ts.transform('Q', 'mean', False)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "75ed1666-5fc8-4707-bf42-62d44adcae18",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(tst)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "bccd7d1c-2d57-444c-af68-290f476f2b05",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(datetime.datetime(2017, 1, 1, 0, 0), 1010.4553846153846)\n",
"(datetime.datetime(2017, 4, 1, 0, 0), 1019.34)\n",
"(datetime.datetime(2017, 7, 1, 0, 0), 1015.3515384615384)\n",
"(datetime.datetime(2017, 10, 1, 0, 0), 1031.2892857142858)\n",
"(datetime.datetime(2018, 1, 1, 0, 0), 1054.7216666666666)\n",
"(datetime.datetime(2018, 4, 1, 0, 0), 1059.736153846154)\n",
"(datetime.datetime(2018, 7, 1, 0, 0), 1049.1100000000001)\n",
"(datetime.datetime(2018, 10, 1, 0, 0), 1051.663076923077)\n",
"(datetime.datetime(2019, 1, 1, 0, 0), 1062.2869230769231)\n",
"(datetime.datetime(2019, 4, 1, 0, 0), 1059.7423076923076)\n",
"(datetime.datetime(2019, 7, 1, 0, 0), 1050.7661538461539)\n",
"(datetime.datetime(2019, 10, 1, 0, 0), 1045.2061538461537)\n",
"(datetime.datetime(2020, 1, 1, 0, 0), 1046.11)\n",
"(datetime.datetime(2020, 4, 1, 0, 0), 1053.126923076923)\n",
"(datetime.datetime(2020, 7, 1, 0, 0), 1053.273846153846)\n",
"(datetime.datetime(2020, 10, 1, 0, 0), 1064.2384615384615)\n",
"(datetime.datetime(2021, 1, 1, 0, 0), 1073.1538461538462)\n",
"(datetime.datetime(2021, 4, 1, 0, 0), 1094.3215384615385)\n",
"(datetime.datetime(2021, 7, 1, 0, 0), 1104.3584615384616)\n",
"(datetime.datetime(2021, 10, 1, 0, 0), 1112.806923076923)\n"
]
}
],
"source": [
"for i in tst:\n",
" print(i)"
]
}
],
"metadata": {
"interpreter": {
"hash": "71e6a8e087576f7c2a714460e6ef0339bac111b70cc81e9aa980fde63219ab06"
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}