#################################################################################################
### Program: SimTrade_Option_Implied_Risk_Neutral_Density_Extraction_Python_Code              ###
### Author: Saral Bindal                                                                      ###
### Contact: saralbindal.24@kgpian.iitkgp.ac.in                                               ###
### Article on the SimTrade blog:                                                             ###
#################################################################################################


# Python program used to recover the implied risk-neutral probability density
# from European call option prices using the Black-Scholes framework,
# implied volatility estimation, and cubic spline interpolation.

# Objectives of the program:
#   1) Estimate implied volatilities from simulated market option prices
#   2) Construct a smooth implied volatility smile using cubic spline interpolation
#   3) Recover the option-implied risk-neutral density using the Breeden-Litzenberger approach


##################################################################################################
### Organization of the program:                                                               ###
###   STEP 1: Environment setup and package initialization                                     ###
###   STEP 2: Simulate option chain data                                                       ###
###   STEP 3: Load and preprocess option market data                                           ###
###   STEP 4: Black-Scholes pricing and implied volatility estimation                          ###
###   STEP 5: Cubic spline interpolation of implied volatility smile                           ###
###   STEP 6: Recovery of implied risk-neutral density                                         ###
###   STEP 7: Visualization of implied volatility smile and risk-neutral density               ###
##################################################################################################


#################
# Documentation #
#################

# Python packages
# https://numpy.org/doc/
# https://pandas.pydata.org/docs/
# https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.html
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.CubicSpline.html
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.brentq.html
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html

# Academic articles
# Black, F., & Scholes, M. (1973). The pricing of options and corporate liabilities. Journal of Political Economy, 81(3), 637–654.
# Merton, R. C. (1973). Theory of rational option pricing. Bell Journal of Economics and Management Science, 4(1), 141–183.
# Breeden, D. T., & Litzenberger, R. H. (1978). Prices of state-contingent claims implicit in option prices. Journal of Business, 51(4), 621–651.
# Dumas, B., Fleming, J., & Whaley, R. E. (1998). Implied volatility functions: Empirical tests. Journal of Finance, 53(6), 2059–2106.
# Aït-Sahalia, Y., & Lo, A. W. (1998). Nonparametric estimation of state-price densities implicit in financial asset prices. Journal of Finance, 53(2), 499–547.
# Shimko, D. (1993). Bounds of probability. Risk, 6(4), 33–37.
# Jackwerth, J. C., & Rubinstein, M. (1996). Recovering probability distributions from option prices. Journal of Finance, 51(5), 1611–1631.


########################################################
# STEP 1: Environment setup and package initialization #
########################################################

import sys
import subprocess

# Automatically install missing Python packages
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])


# Numerical computing package
try:
    import numpy as np
except ImportError:
    install("numpy")
    import numpy as np


# Data analysis and manipulation package
try:
    import pandas as pd
except ImportError:
    install("pandas")
    import pandas as pd


# Visualization and plotting package
try:
    import matplotlib.pyplot as plt
except ImportError:
    install("matplotlib")
    import matplotlib.pyplot as plt


# Scientific computing and statistical functions
try:
    from scipy.interpolate import CubicSpline
    from scipy.optimize import brentq
    from scipy.stats import norm

except ImportError:

    install("scipy")

    from scipy.interpolate import CubicSpline
    from scipy.optimize import brentq
    from scipy.stats import norm


########################################################
# STEP 2: Simulate option chain data                   #
########################################################

# Parameters
S = 5300
r = 0.052
T = 30 / 365

# Black-Scholes call pricer
def bs_call(S, K, T, r, sigma):
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)

# Hidden volatility smile
def volatility_smile(K, S):
    x     = np.log(K / S)
    sigma = 0.18 - 0.22 * x + 1.10 * x**2 + 2.5 * max(-x, 0)**3
    return max(sigma, 0.08)

# Bid-ask spread model
def bid_ask_spread(option_price, K, S, T):
    moneyness     = abs(K - S) / S
    liquidity_adj = 0.015 + 0.08 * moneyness**1.2
    time_adj      = 1 + 0.25 * np.exp(-12 * T)
    spread        = option_price * liquidity_adj * time_adj
    min_tick      = 0.05 if option_price < 3 else 0.10
    return max(spread, min_tick)

# Strike grid
strike_low  = round(S * 0.80 / 25) * 25
strike_high = round(S * 1.20 / 25) * 25
strikes     = np.arange(strike_low, strike_high + 25, 25)

# Fix seed immediately before random number consumption
rng = np.random.default_rng(42)

# Generate simulated call option chain
rows = []

for K in strikes:
    sigma            = volatility_smile(K, S)
    call_theoretical = bs_call(S, K, T, r, sigma)

    spread     = bid_ask_spread(call_theoretical, K, S, T)
    bid        = round(max(0.01, call_theoretical - spread / 2), 2)
    ask        = round(call_theoretical + spread / 2, 2)

    # Deterministic: exact mid, no tick noise
    last_price = round((bid + ask) / 2, 2)

    # Deterministic: zero daily change
    change     = 0.00

    # Deterministic: base volume and OI, no scaling
    moneyness     = (K - S) / S
    volume        = int(max(100, int(8000  * np.exp(-6 * moneyness**2))))
    open_interest = int(max(200, int(30000 * np.exp(-5 * moneyness**2))))

    rows.append({
        "strike"       : round(float(K), 2),
        "lastPrice"    : last_price,
        "change"       : change,
        "bid"          : bid,
        "ask"          : ask,
        "volume"       : volume,
        "openInterest" : open_interest
    })

option_chain = pd.DataFrame(rows)

print("===================================================")
print(" SIMULATED CALL OPTION CHAIN")
print("===================================================")
print(option_chain.to_string(index=False))


##################################################
# STEP 3: Load and preprocess option market data #
##################################################

# Compute bid-ask midpoint prices
option_chain["mid_price"] = (option_chain["bid"] + option_chain["ask"]) / 2

# Retain only actively traded contracts
option_chain = option_chain[option_chain["volume"] != 0].copy()

# Market parameters
spot_price       = 5300
risk_free_rate   = 0.052
time_to_maturity = 30 / 365

# Discount factor used in arbitrage bounds
discount_factor = np.exp(-risk_free_rate * time_to_maturity)

# Lower arbitrage bound for call prices
lower_bound = np.maximum(0, spot_price - option_chain["strike"] * discount_factor)

# Upper arbitrage bound for call prices
upper_bound = spot_price

# Remove contracts violating no-arbitrage conditions
option_chain = option_chain[
    (option_chain["mid_price"] >= lower_bound) &
    (option_chain["mid_price"] <= upper_bound)
].copy()

# Restrict strike range for stable interpolation
option_chain = option_chain[
    (option_chain["strike"] > 4000) &
    (option_chain["strike"] < 6000)
].copy()

# Sort option contracts by strike price
option_chain = option_chain.sort_values("strike")


###################################################################
# STEP 4: Black-Scholes pricing and implied volatility estimation #
###################################################################

# Black-Scholes pricing formula for European call options
def black_scholes_call_price(S, K, T, r, sigma):
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    return (S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2))

# Numerical implied volatility solver
def implied_volatility(market_price, S, K, T, r):
    # Root-finding objective function
    def objective_function(sigma):
        return (black_scholes_call_price(S, K, T, r, sigma) - market_price)
    # Brent numerical optimization method
    try:
        return brentq(objective_function, 1e-6, 5.0)
    except:
        return np.nan

# Estimate implied volatility for each option contract
option_chain["implied_volatility"] = option_chain.apply(
    lambda row: implied_volatility(
        row["mid_price"],
        spot_price,
        row["strike"],
        time_to_maturity,
        risk_free_rate
    ),
    axis=1
)

# Remove invalid implied volatility estimates
option_chain = option_chain.dropna()


##########################################################
# STEP 5: Cubic spline interpolation of IV smile         #
##########################################################

# Dense strike grid for interpolation
strike_grid = np.linspace(option_chain["strike"].min(), option_chain["strike"].max(), 2000)

# Natural cubic spline interpolation of implied volatility
volatility_spline = CubicSpline(option_chain["strike"], option_chain["implied_volatility"], bc_type="natural")

# Interpolated implied volatility values
smoothed_volatility = volatility_spline(strike_grid)


##########################################################
# STEP 6: Recovery of implied risk-neutral density       #
##########################################################

# Generate smooth option prices across strike grid
smoothed_call_prices = np.array([
    black_scholes_call_price(
        spot_price,
        strike,
        time_to_maturity,
        risk_free_rate,
        volatility
    )
    for strike, volatility in zip(
        strike_grid,
        smoothed_volatility
    )
])

# First derivative of option prices with respect to strike
first_derivative = np.gradient(smoothed_call_prices, strike_grid)

# Second derivative used in density extraction
second_derivative = np.gradient(first_derivative, strike_grid)

# Breeden-Litzenberger risk-neutral density formula
risk_neutral_density = (np.exp(risk_free_rate * time_to_maturity) * second_derivative)

# Remove negative numerical artifacts
risk_neutral_density = np.maximum(risk_neutral_density, 0)


##########################################################
# STEP 7: Visualization of IV smile and RND              #
##########################################################

# Plot observed and interpolated implied volatility smile
plt.figure(figsize=(10, 6))
plt.scatter(option_chain["strike"], option_chain["implied_volatility"] * 100, label="Observed Implied Volatility")
plt.plot(strike_grid, smoothed_volatility * 100, linewidth=2, label="Cubic Spline Interpolation")
plt.xlabel("Strike Price")
plt.ylabel("Implied Volatility (%)")
plt.title("Implied Volatility Smile")
plt.legend()
plt.grid(True)
plt.show()

# Plot recovered implied risk-neutral density
plt.figure(figsize=(10, 6))
plt.plot(strike_grid, risk_neutral_density, linewidth=2)
plt.xlabel("Terminal Asset Price")
plt.ylabel("Probability Density")
plt.title("Option-Implied Risk-Neutral Density")
plt.grid(True)
plt.show()


# End of the code