#################################################################################################
### Program: SimTrade_Option_Implied_Risk_Neutral_Density_Extraction_R_Code                   ###
### Author: Saral Bindal                                                                      ###
### Contact: saralbindal.24@kgpian.iitkgp.ac.in                                               ###
### Article on the SimTrade blog:                                                             ###
#################################################################################################


# R program used to recover the implied risk-neutral probability density from European call option prices using the Black-Scholes framework, implied volatility estimation, and cubic spline interpolation.

# Objectives of the program:
#   1) Estimate implied volatilities from simulated market option prices
#   2) Construct a smooth implied volatility smile using cubic spline interpolation
#   3) Recover the option-implied risk-neutral density using the Breeden-Litzenberger approach


##################################################################################################
### Organization of the program:                                                               ###
###   STEP 1: Environment setup and package initialization                                     ###
###   STEP 2: Simulate option chain data                                                       ###
###   STEP 3: Preprocess option market data                                                    ###
###   STEP 4: Black-Scholes pricing and implied volatility estimation                          ###
###   STEP 5: Cubic spline interpolation of implied volatility smile                           ###
###   STEP 6: Recovery of implied risk-neutral density                                         ###
###   STEP 7: Visualization of implied volatility smile and risk-neutral density               ###
##################################################################################################


#################
# Documentation #
#################

# R packages
# https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/splinefun
# https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/uniroot
# https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/Normal
# https://ggplot2.tidyverse.org/
# https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/diff

# Academic articles
# Black, F., & Scholes, M. (1973). The pricing of options and corporate liabilities. Journal of Political Economy, 81(3), 637-654.
# Merton, R. C. (1973). Theory of rational option pricing. Bell Journal of Economics and Management Science, 4(1), 141-183.
# Breeden, D. T., & Litzenberger, R. H. (1978). Prices of state-contingent claims implicit in option prices. Journal of Business, 51(4), 621-651.
# Dumas, B., Fleming, J., & Whaley, R. E. (1998). Implied volatility functions: Empirical tests. Journal of Finance, 53(6), 2059-2106.
# Ait-Sahalia, Y., & Lo, A. W. (1998). Nonparametric estimation of state-price densities implicit in financial asset prices. Journal of Finance, 53(2), 499-547.
# Shimko, D. (1993). Bounds of probability. Risk, 6(4), 33-37.
# Jackwerth, J. C., & Rubinstein, M. (1996). Recovering probability distributions from option prices. Journal of Finance, 51(5), 1611-1631.


########################################################
# STEP 1: Environment setup and package initialization #
########################################################

install_if_missing <- function(pkg) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg)
  }
}

install_if_missing("ggplot2")
library(ggplot2)


########################################################
# STEP 2: Simulate option chain data                   #
########################################################

# Parameters
S <- 5300
r <- 0.052
T <- 30 / 365

# Black-Scholes call pricer
bs_call <- function(S, K, T, r, sigma) {
  d1 <- (log(S / K) + (r + 0.5 * sigma^2) * T) / (sigma * sqrt(T))
  d2 <- d1 - sigma * sqrt(T)
  return(S * pnorm(d1) - K * exp(-r * T) * pnorm(d2))
}

# Hidden volatility smile
volatility_smile <- function(K, S) {
  x     <- log(K / S)
  sigma <- 0.18 - 0.22 * x + 1.10 * x^2 + 2.5 * pmax(-x, 0)^3
  return(max(sigma, 0.08))
}

# Bid-ask spread model
bid_ask_spread <- function(option_price, K, S, T) {
  moneyness     <- abs(K - S) / S
  liquidity_adj <- 0.015 + 0.08 * moneyness^1.2
  time_adj      <- 1 + 0.25 * exp(-12 * T)
  spread        <- option_price * liquidity_adj * time_adj
  min_tick      <- ifelse(option_price < 3, 0.05, 0.10)
  return(max(spread, min_tick))
}

# Strike grid
strikes <- seq(
  round(S * 0.80 / 25) * 25,
  round(S * 1.20 / 25) * 25,
  by = 25
)

# Fix seed immediately before random number consumption
set.seed(42)

# Generate simulated call option chain
rows <- list()

for (i in seq_along(strikes)) {
  K     <- strikes[i]
  sigma <- volatility_smile(K, S)
  call_theoretical <- bs_call(S, K, T, r, sigma)
  
  spread <- bid_ask_spread(call_theoretical, K, S, T)
  bid    <- round(max(0.01, call_theoretical - spread / 2), 2)
  ask    <- round(call_theoretical + spread / 2, 2)
  
  # Deterministic: exact mid, no tick noise
  last_price <- round((bid + ask) / 2, 2)
  
  # Deterministic: zero daily change
  change <- 0.00
  
  # Deterministic: base volume and OI, no scaling
  moneyness     <- (K - S) / S
  volume        <- as.integer(max(100, as.integer(8000  * exp(-6 * moneyness^2))))
  open_interest <- as.integer(max(200, as.integer(30000 * exp(-5 * moneyness^2))))
  
  rows[[i]] <- data.frame(
    strike       = round(K, 2),
    lastPrice    = last_price,
    change       = change,
    bid          = bid,
    ask          = ask,
    volume       = volume,
    openInterest = open_interest
  )
}

option_chain <- do.call(rbind, rows)

print("===================================================")
print(" SIMULATED CALL OPTION CHAIN")
print("===================================================")
print(option_chain)


##################################################
# STEP 3: Preprocess option market data          #
##################################################

# Compute bid-ask midpoint prices
option_chain$mid_price <- (option_chain$bid + option_chain$ask) / 2

# Retain only actively traded contracts
option_chain <- option_chain[option_chain$volume != 0, ]

# Market parameters
spot_price       <- 5300
risk_free_rate   <- 0.052
time_to_maturity <- 30 / 365

# Discount factor used in arbitrage bounds
discount_factor <- exp(-risk_free_rate * time_to_maturity)

# Lower arbitrage bound for call prices
lower_bound <- pmax(0, spot_price - option_chain$strike * discount_factor)

# Upper arbitrage bound for call prices
upper_bound <- spot_price

# Remove contracts violating no-arbitrage conditions
option_chain <- option_chain[
  (option_chain$mid_price >= lower_bound) &
    (option_chain$mid_price <= upper_bound),
]

# Restrict strike range for stable interpolation
option_chain <- option_chain[
  (option_chain$strike > 4000) &
    (option_chain$strike < 6000),
]

# Sort option contracts by strike price
option_chain <- option_chain[order(option_chain$strike), ]


###################################################################
# STEP 4: Black-Scholes pricing and implied volatility estimation #
###################################################################

# Black-Scholes pricing formula for European call options
black_scholes_call_price <- function(S, K, T, r, sigma) {
  d1 <- (log(S / K) + (r + 0.5 * sigma^2) * T) / (sigma * sqrt(T))
  d2 <- d1 - sigma * sqrt(T)
  return(S * pnorm(d1) - K * exp(-r * T) * pnorm(d2))
}

# Numerical implied volatility solver
implied_volatility <- function(market_price, S, K, T, r) {
  objective_function <- function(sigma) {
    black_scholes_call_price(S, K, T, r, sigma) - market_price
  }
  result <- tryCatch(
    uniroot(objective_function, lower = 1e-6, upper = 5.0)$root,
    error = function(e) NA_real_
  )
  return(result)
}

# Estimate implied volatility for each option contract
option_chain$implied_volatility <- mapply(
  function(mid_price, strike) {
    implied_volatility(
      mid_price,
      spot_price,
      strike,
      time_to_maturity,
      risk_free_rate
    )
  },
  option_chain$mid_price,
  option_chain$strike
)

# Remove invalid implied volatility estimates
option_chain <- option_chain[!is.na(option_chain$implied_volatility), ]


##########################################################
# STEP 5: Cubic spline interpolation of IV smile         #
##########################################################

# Dense strike grid for interpolation
strike_grid <- seq(min(option_chain$strike), max(option_chain$strike), length.out = 2000)

# Natural cubic spline interpolation of implied volatility
volatility_spline <- splinefun(option_chain$strike, option_chain$implied_volatility, method = "natural")

# Interpolated implied volatility values
smoothed_volatility <- volatility_spline(strike_grid)


##########################################################
# STEP 6: Recovery of implied risk-neutral density       #
##########################################################

# Generate smooth option prices across strike grid
smoothed_call_prices <- mapply(
  function(strike, volatility) {
    black_scholes_call_price(
      spot_price,
      strike,
      time_to_maturity,
      risk_free_rate,
      volatility
    )
  },
  strike_grid,
  smoothed_volatility
)

# First derivative of option prices with respect to strike
dk               <- diff(strike_grid)[1]
first_derivative <- diff(smoothed_call_prices) / dk
first_derivative <- c(first_derivative, tail(first_derivative, 1))

# Second derivative used in density extraction
second_derivative <- diff(first_derivative) / dk
second_derivative <- c(second_derivative, tail(second_derivative, 1))

# Breeden-Litzenberger risk-neutral density formula
risk_neutral_density <- exp(risk_free_rate * time_to_maturity) * second_derivative

# Remove negative numerical artifacts
risk_neutral_density <- pmax(risk_neutral_density, 0)


##########################################################
# STEP 7: Visualization of IV smile and RND              #
##########################################################

# Plot observed and interpolated implied volatility smile
iv_df     <- data.frame(strike = option_chain$strike, iv = option_chain$implied_volatility * 100)
spline_df <- data.frame(strike = strike_grid, iv = smoothed_volatility * 100)

print(
  ggplot() +
    geom_point(data = iv_df,     aes(x = strike, y = iv), color = "steelblue", size = 2, alpha = 0.7) +
    geom_line( data = spline_df, aes(x = strike, y = iv), color = "darkorange", linewidth = 1) +
    labs(
      x     = "Strike Price",
      y     = "Implied Volatility (%)",
      title = "Implied Volatility Smile"
    ) +
    theme_bw() +
    theme(
      panel.grid       = element_line(color = "grey90"),
      plot.title       = element_text(hjust = 0.5)
    )
)

# Plot recovered implied risk-neutral density
rnd_df <- data.frame(strike = strike_grid, density = risk_neutral_density)

print(
  ggplot(rnd_df, aes(x = strike, y = density)) +
    geom_line(color = "steelblue", linewidth = 1) +
    labs(
      x     = "Terminal Asset Price",
      y     = "Probability Density",
      title = "Option-Implied Risk-Neutral Density"
    ) +
    theme_bw() +
    theme(
      panel.grid       = element_line(color = "grey90"),
      plot.title       = element_text(hjust = 0.5)
    )
)


# End of the code
