ArcticDB_demo_equity_options
View in Github | Open in Google ColabUsing ArcticDB for equity options data: a worked example
A Sample Workflow for Equity Options
In this notebook we will:
- Download (from github) a set of market data for options on a group of tech stocks
- Store the data in ArcticDB using an incremental timeseries update workflow
- Create a range of useful option queries
- Use the queries to drive an interactive chart
Thank you to optiondata.org
We would like to express our gratitude to optiondata.org for giving us permission to use a small slice of their free data in this demo.
This data is free and available on their website for you to download yourself.
Installs and Imports¶
In [ ]:
Copied!
!pip install arcticdb
!pip install arcticdb
In [ ]:
Copied!
%matplotlib inline
import arcticdb as adb
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import ipywidgets as widgets
import functools
%matplotlib inline
import arcticdb as adb
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import ipywidgets as widgets
import functools
Parameters for the Notebook¶
In [3]:
Copied!
branch = 'master'
all_dates = ['2013-06-03', '2013-06-10', '2013-06-17', '2013-06-24']
delta_low = 0.05
delta_high = 0.55
branch = 'master'
all_dates = ['2013-06-03', '2013-06-10', '2013-06-17', '2013-06-24']
delta_low = 0.05
delta_high = 0.55
ArcticDB Setup¶
In [4]:
Copied!
arctic = adb.Arctic("lmdb://arcticdb_equity_options")
lib = arctic.get_library('demo_options', create_if_missing=True)
arctic = adb.Arctic("lmdb://arcticdb_equity_options")
lib = arctic.get_library('demo_options', create_if_missing=True)
Functions for Reading Github and ArcticDB Queries¶
In [12]:
Copied!
def gitub_url(as_of, branch):
return f"https://raw.githubusercontent.com/man-group/ArcticDB/{branch}/docs/mkdocs/docs/notebooks/data/{as_of}tech-options.csv"
# read the csv from github
def read_github_options_file(as_of, branch='master'):
try:
raw_df = pd.read_csv(gitub_url(as_of, branch))
except Exception as e:
raise Exception(f"Github access error: {e}")
return raw_df.set_index(pd.DatetimeIndex(raw_df['quote_date'])).drop(columns=['Unnamed: 0'])
def uly_symbol(uly):
return f"options/{uly}"
# query to get option expiries for an underlying
def options_expiries_query(lazy_df, as_of, underlying):
# exclude options expiring on as_of - no time value left
filter = (lazy_df['expiration'] != as_of)
lazy_df = lazy_df[filter].groupby('expiration').agg({'volume': 'sum'})
return lazy_df
def read_expiries(as_of, underlying):
read_date = pd.Timestamp(as_of)
sym = uly_symbol(underlying)
lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True)
lazy_df = options_expiries_query(lazy_df, as_of, underlying)
return lazy_df.collect().data.sort_index().index.values
def read_all_underlyings():
# use the symbol list to get all underlyings
return sorted([s.split('/')[1] for s in lib.list_symbols()])
# query to get all options for an expiry
# as_of via date_range, uly via symbol
def options_curve_single_expiry_query(lazy_df, expiry):
filter = (lazy_df['expiration'] == expiry)
return lazy_df[filter]
# query to get all options for an expiry with delta in a specified interval
# calls have delta >= 0, puts have delta <= 0
def options_curve_single_expiry_delta_bracket_query(lazy_df, expiry, delta_low, delta_high):
filter = ((lazy_df['expiration'] == expiry) &
(abs(lazy_df['delta']) >= delta_low) &
(abs(lazy_df['delta']) <= delta_high)
)
return lazy_df[filter].groupby('strike').agg({'implied_volatility': 'mean'})
def read_vol_curve_single_expiry(as_of, underlying, expiry):
read_date = pd.Timestamp(as_of)
sym = uly_symbol(underlying)
lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True)
lazy_df = options_curve_single_expiry_query(lazy_df, expiry)
return lazy_df.collect().data
@functools.cache
def read_vol_curve_single_expiry_exclude_itm_otm(as_of, underlying, expiry, delta_low, delta_high):
read_date = pd.Timestamp(as_of)
sym = uly_symbol(underlying)
lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True)
lazy_df = options_curve_single_expiry_delta_bracket_query(lazy_df, expiry, delta_low, delta_high)
vol_curve_raw = lazy_df.collect().data
return vol_curve_raw.sort_index()*100
def gitub_url(as_of, branch):
return f"https://raw.githubusercontent.com/man-group/ArcticDB/{branch}/docs/mkdocs/docs/notebooks/data/{as_of}tech-options.csv"
# read the csv from github
def read_github_options_file(as_of, branch='master'):
try:
raw_df = pd.read_csv(gitub_url(as_of, branch))
except Exception as e:
raise Exception(f"Github access error: {e}")
return raw_df.set_index(pd.DatetimeIndex(raw_df['quote_date'])).drop(columns=['Unnamed: 0'])
def uly_symbol(uly):
return f"options/{uly}"
# query to get option expiries for an underlying
def options_expiries_query(lazy_df, as_of, underlying):
# exclude options expiring on as_of - no time value left
filter = (lazy_df['expiration'] != as_of)
lazy_df = lazy_df[filter].groupby('expiration').agg({'volume': 'sum'})
return lazy_df
def read_expiries(as_of, underlying):
read_date = pd.Timestamp(as_of)
sym = uly_symbol(underlying)
lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True)
lazy_df = options_expiries_query(lazy_df, as_of, underlying)
return lazy_df.collect().data.sort_index().index.values
def read_all_underlyings():
# use the symbol list to get all underlyings
return sorted([s.split('/')[1] for s in lib.list_symbols()])
# query to get all options for an expiry
# as_of via date_range, uly via symbol
def options_curve_single_expiry_query(lazy_df, expiry):
filter = (lazy_df['expiration'] == expiry)
return lazy_df[filter]
# query to get all options for an expiry with delta in a specified interval
# calls have delta >= 0, puts have delta <= 0
def options_curve_single_expiry_delta_bracket_query(lazy_df, expiry, delta_low, delta_high):
filter = ((lazy_df['expiration'] == expiry) &
(abs(lazy_df['delta']) >= delta_low) &
(abs(lazy_df['delta']) <= delta_high)
)
return lazy_df[filter].groupby('strike').agg({'implied_volatility': 'mean'})
def read_vol_curve_single_expiry(as_of, underlying, expiry):
read_date = pd.Timestamp(as_of)
sym = uly_symbol(underlying)
lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True)
lazy_df = options_curve_single_expiry_query(lazy_df, expiry)
return lazy_df.collect().data
@functools.cache
def read_vol_curve_single_expiry_exclude_itm_otm(as_of, underlying, expiry, delta_low, delta_high):
read_date = pd.Timestamp(as_of)
sym = uly_symbol(underlying)
lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True)
lazy_df = options_curve_single_expiry_delta_bracket_query(lazy_df, expiry, delta_low, delta_high)
vol_curve_raw = lazy_df.collect().data
return vol_curve_raw.sort_index()*100
Read Data from Github and Store in ArcticDB¶
In [13]:
Copied!
for d in all_dates:
df = read_github_options_file(d, branch)
underlyings = df['underlying'].unique()
print(f"Date: {d} - {len(df)} records, {len(underlyings)} underlyings")
for u in underlyings:
uly_df = df[df['underlying']==u]
sym = uly_symbol(u)
# upsert option creates the symbol if it doesn't already exist
lib.update(sym, uly_df, upsert=True)
for d in all_dates:
df = read_github_options_file(d, branch)
underlyings = df['underlying'].unique()
print(f"Date: {d} - {len(df)} records, {len(underlyings)} underlyings")
for u in underlyings:
uly_df = df[df['underlying']==u]
sym = uly_symbol(u)
# upsert option creates the symbol if it doesn't already exist
lib.update(sym, uly_df, upsert=True)
Date: 2013-06-03 - 6792 records, 11 underlyings Date: 2013-06-10 - 6622 records, 11 underlyings Date: 2013-06-17 - 6442 records, 11 underlyings Date: 2013-06-24 - 6134 records, 11 underlyings
In [14]:
Copied!
# lets take a look at the symbols we have created - one for each underlying
lib.list_symbols()
# lets take a look at the symbols we have created - one for each underlying
lib.list_symbols()
Out[14]:
['options/IBM', 'options/NVDA', 'options/MSFT', 'options/AAPL', 'options/CSCO', 'options/ASML', 'options/TSM', 'options/ADBE', 'options/AMZN', 'options/ORCL', 'options/TSLA']
In [15]:
Copied!
all_uly = read_all_underlyings()
all_uly
all_uly = read_all_underlyings()
all_uly
Out[15]:
['AAPL', 'ADBE', 'AMZN', 'ASML', 'CSCO', 'IBM', 'MSFT', 'NVDA', 'ORCL', 'TSLA', 'TSM']
Functions for Creating the Charts and Simple Interactive Controls¶
In [16]:
Copied!
def create_vol_curve_chart_single(ax, as_of, uly, delta_low, delta_high):
exp = read_expiries(as_of, uly)
cmap = plt.get_cmap('rainbow', len(exp))
format_kw = {'linewidth': 2, 'alpha': 0.85}
for i, e in enumerate(exp):
vol_curve = read_vol_curve_single_expiry_exclude_itm_otm(as_of, uly, e, delta_low, delta_high)
vol_curve.plot(ax=ax, y='implied_volatility', label=e, grid=True, color=cmap(i), **format_kw)
ax.set_title(f"Option Volatility Curves for {uly}, as of {as_of}")
ax.set_ylabel("implied volatility")
ax.legend(loc='upper right', framealpha=0.7)
ax.set_facecolor('whitesmoke')
def create_vol_curve_chart(as_of1, uly1, as_of2, uly2, delta_low, delta_high):
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))
create_vol_curve_chart_single(ax1, as_of1, uly1, delta_low, delta_high)
create_vol_curve_chart_single(ax2, as_of2, uly2, delta_low, delta_high)
class input_widgets(object):
def __init__(self):
self.container = widgets.VBox()
self.create_widgets()
self.redraw_chart()
@property
def as_of1(self):
return self.container.children[0].children[0].value
@property
def uly1(self):
return self.container.children[0].children[1].value
@property
def as_of2(self):
return self.container.children[1].children[0].value
@property
def uly2(self):
return self.container.children[1].children[1].value
@property
def out(self):
return self.container.children[2]
def create_widgets(self):
self.as_of1_dd = widgets.Dropdown(
options=all_dates,
value=all_dates[0],
description='Date1:',
disabled=False,
)
self.as_of1_dd.observe(self._on_change, ['value'])
self.as_of2_dd = widgets.Dropdown(
options=all_dates,
value=all_dates[0],
description='Date2:',
disabled=False,
)
self.as_of2_dd.observe(self._on_change, ['value'])
self.uly1_dd = widgets.Dropdown(
options=all_uly,
value=all_uly[0],
description='Underlying1:',
disabled=False,
)
self.uly1_dd.observe(self._on_change, ['value'])
self.uly2_dd = widgets.Dropdown(
options=all_uly,
value=all_uly[1],
description='Underlying2:',
disabled=False,
)
self.uly2_dd.observe(self._on_change, ['value'])
self.output_widget = widgets.Output(layout=widgets.Layout(height='900px'))
self.container.children = [
widgets.HBox([self.as_of1_dd, self.uly1_dd]),
widgets.HBox([self.as_of2_dd, self.uly2_dd]),
self.output_widget
]
def _on_change(self, _):
self.redraw_chart()
def redraw_chart(self):
with self.output_widget:
self.output_widget.clear_output(wait=True)
create_vol_curve_chart(self.as_of1, self.uly1, self.as_of2, self.uly2, delta_low, delta_high)
plt.show()
def create_vol_curve_chart_single(ax, as_of, uly, delta_low, delta_high):
exp = read_expiries(as_of, uly)
cmap = plt.get_cmap('rainbow', len(exp))
format_kw = {'linewidth': 2, 'alpha': 0.85}
for i, e in enumerate(exp):
vol_curve = read_vol_curve_single_expiry_exclude_itm_otm(as_of, uly, e, delta_low, delta_high)
vol_curve.plot(ax=ax, y='implied_volatility', label=e, grid=True, color=cmap(i), **format_kw)
ax.set_title(f"Option Volatility Curves for {uly}, as of {as_of}")
ax.set_ylabel("implied volatility")
ax.legend(loc='upper right', framealpha=0.7)
ax.set_facecolor('whitesmoke')
def create_vol_curve_chart(as_of1, uly1, as_of2, uly2, delta_low, delta_high):
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))
create_vol_curve_chart_single(ax1, as_of1, uly1, delta_low, delta_high)
create_vol_curve_chart_single(ax2, as_of2, uly2, delta_low, delta_high)
class input_widgets(object):
def __init__(self):
self.container = widgets.VBox()
self.create_widgets()
self.redraw_chart()
@property
def as_of1(self):
return self.container.children[0].children[0].value
@property
def uly1(self):
return self.container.children[0].children[1].value
@property
def as_of2(self):
return self.container.children[1].children[0].value
@property
def uly2(self):
return self.container.children[1].children[1].value
@property
def out(self):
return self.container.children[2]
def create_widgets(self):
self.as_of1_dd = widgets.Dropdown(
options=all_dates,
value=all_dates[0],
description='Date1:',
disabled=False,
)
self.as_of1_dd.observe(self._on_change, ['value'])
self.as_of2_dd = widgets.Dropdown(
options=all_dates,
value=all_dates[0],
description='Date2:',
disabled=False,
)
self.as_of2_dd.observe(self._on_change, ['value'])
self.uly1_dd = widgets.Dropdown(
options=all_uly,
value=all_uly[0],
description='Underlying1:',
disabled=False,
)
self.uly1_dd.observe(self._on_change, ['value'])
self.uly2_dd = widgets.Dropdown(
options=all_uly,
value=all_uly[1],
description='Underlying2:',
disabled=False,
)
self.uly2_dd.observe(self._on_change, ['value'])
self.output_widget = widgets.Output(layout=widgets.Layout(height='900px'))
self.container.children = [
widgets.HBox([self.as_of1_dd, self.uly1_dd]),
widgets.HBox([self.as_of2_dd, self.uly2_dd]),
self.output_widget
]
def _on_change(self, _):
self.redraw_chart()
def redraw_chart(self):
with self.output_widget:
self.output_widget.clear_output(wait=True)
create_vol_curve_chart(self.as_of1, self.uly1, self.as_of2, self.uly2, delta_low, delta_high)
plt.show()
Interactive Vol Curves for Side By Side Viewing of 2 Dates/Stocks¶
- Click the dropdowns and the charts will update
- The data is read from ArcticDB as part of the chart creation
In [17]:
Copied!
w = input_widgets()
w.container
w = input_widgets()
w.container
Out[17]:
VBox(children=(HBox(children=(Dropdown(description='Date1:', options=('2013-06-03', '2013-06-10', '2013-06-17'…
For the notebook preview, the chart and widgets will look like this image
Conclusions¶
- We have demonstrated a simple data pipeline for capture and storage of options data
- We created a useful series of queries on the data
- These queries are used to build interactive charts
- The result is a simple visual browser for option volatility curves
- In a real system, the data collection and storage would probably be a separate process from the visualisation
- The simplicity and flexibility of ArcticDB made the data handling the easy part, compared to getting the charts and interactive widgets looking nice