##################################################################################################
### Program: SimTrade_S&P500_returns_and_volatility_2025_12_23_SB                              ###
### Author: Saral Bindal                                                                       ###
### Contact: saralbindal.24@kgpian.iitkgp.ac.in                                                ###
### Article on the SimTrade blog: https://www.simtrade.fr/blog_simtrade/historical-volatility/ ###
##################################################################################################


# R program used to calculate the historical volatility of the S&P 500 index using daily data over the period 2020- 2025.

# Objectives of the program:
#   1) Compute daily arithmetic and logarithmic returns S&P 500 index (2020-2025) data and plot their empirical distribution.
#   2) Compute the first four statistical moments (mean, variance, skewness, and kurtosis) for both return measures.
#   3) Compute the annualized and rolling historical volatility and plot a chart for the same.


####################################################################################################
### Organization of the program:                                                                 ###
###   STEP 1: Environment setup and package loading                                              ###
###   STEP 2: Downloading market data                                                            ###
###   STEP 3: Computation and visualization of returns                                           ###
###   STEP 4: Computation and visualization of Historical volatility                             ###
####################################################################################################


####################################################################################################
### Parameters to change:                                                                        ###
###   Rolling window length for volatility estimation                                            ###
####################################################################################################


#################
# Documentation #
#################

# R packages 
# https://www.rdocumentation.org/packages/quantmod/versions/latest/topics/getSymbols
# https://www.rdocumentation.org/packages/xts/versions/latest/topics/index
# https://www.rdocumentation.org/packages/xts/versions/latest/topics/coredata
# https://www.rdocumentation.org/packages/dplyr/versions/latest/topics/select
# https://www.rdocumentation.org/packages/dplyr/versions/latest/topics/mutate
# https://www.rdocumentation.org/packages/dplyr/versions/latest/topics/lag
# https://www.rdocumentation.org/packages/stats/versions/latest/topics/na.omit
# https://www.rdocumentation.org/packages/ggplot2/versions/latest/topics/ggplot
# https://www.rdocumentation.org/packages/ggplot2/versions/latest/topics/geom_histogram
# https://www.rdocumentation.org/packages/ggplot2/versions/latest/topics/geom_line
# https://www.rdocumentation.org/packages/ggplot2/versions/latest/topics/labs
# https://www.rdocumentation.org/packages/ggplot2/versions/latest/topics/theme_minimal
# https://www.rdocumentation.org/packages/ggplot2/versions/latest/topics/theme
# https://www.rdocumentation.org/packages/ggplot2/versions/latest/topics/element_line
# https://www.rdocumentation.org/packages/moments/versions/latest/topics/skewness
# https://www.rdocumentation.org/packages/moments/versions/latest/topics/kurtosis
# https://www.rdocumentation.org/packages/zoo/versions/latest/topics/rollapply
# https://www.rdocumentation.org/packages/gridExtra/versions/latest/topics/grid.arrange

# Academic articles
# Bollerslev, T. (1986). Generalized Autoregressive Conditional Heteroskedasticity, Journal of Econometrics, 31(3), 307–327.
# Engle, R. F. (1982). Autoregressive Conditional Heteroscedasticity with Estimates of the Variance of United Kingdom Inflation, Econometrica, 50(4), 987–1007.
# Fama, E. F., & French, K. R. (2004). The Capital Asset Pricing Model: Theory and Evidence, Journal of Economic Perspectives, 18(3), 25–46.
# Heston, S. L. (1993). A Closed-Form Solution for Options with Stochastic Volatility with Applications to Bond and Currency Options, The Journal of Finance, 48(3), 1–24.
# Markowitz, H. M. (1952). Portfolio Selection, The Journal of Finance, 7(1), 77–91.
# Parkinson, M. (1980). The extreme value method for estimating the variance of the rate of return. Journal of Business, 53(1), 61–65.
# Sharpe, W. F. (1964). Capital Asset Prices: A Theory of Market Equilibrium under Conditions of Risk, The Journal of Finance, 19(3), 425–442.
# Tsay, R. S. (2010). Analysis of financial time series, John Wiley & Sons.


#################################################
# STEP 1: Environment setup and package loading #       
#################################################

# Language used by R to display error messages
Sys.setenv(LANG = "en")

# Remove all objects from the R environment
rm(list = ls())

# Clear the RStudio console
cat("\014")

# Package installation & loading
if (!require("quantmod")) install.packages("quantmod")
library(quantmod)     # Financial data download and time-series handling

if (!require("tidyverse")) install.packages("tidyverse")
library(tidyverse)    # Data manipulation and visualization

if (!require("moments")) install.packages("moments")
library(moments)      # Skewness and kurtosis computation

if (!require("zoo")) install.packages("zoo")
library(zoo)          # Rolling window calculations

if (!require("gridExtra")) install.packages("gridExtra")
library(gridExtra)    # Arrange multiple ggplot objects


######################################
# STEP 2: Downloading of market data #       
######################################

# Define the starting date (last 5 years)
start_date <- Sys.Date() - (365 * 5)

# Download S&P 500 data from Yahoo Finance
getSymbols("^GSPC",
           src = "yahoo",
           from = start_date,
           to = Sys.Date(),
           auto.assign = TRUE,
           yahoo_json = TRUE)

# Convert the downloaded xts object into a data frame
data <- data.frame(Date = index(GSPC), coredata(GSPC))

# Creation of new dataset with only dates and closing prices
data <- data %>%
  select(Date, Close = GSPC.Close)


####################################################
# STEP 3: Computation and visualization of returns #
####################################################

# Compute arithmetic and logarithmic daily returns
data <- data %>%
  mutate(
    Arithmetic_Return = Close / lag(Close) - 1,
    Log_Return = log(Close / lag(Close))
  ) %>%
  na.omit()  # Remove missing values created by lag


# Logarithmic returns
log_ret <- data$Log_Return

# Compute the basic statistics for returns
mean_log <- mean(log_ret)
variance_log <- var(log_ret)
skewness_log <- skewness(log_ret)
kurtosis_log <- kurtosis(log_ret) - 3  # Excess kurtosis
std_log <- sd(log_ret)
annualized_vol <- std_log * sqrt(252)

cat("For logarithmic returns:\n")
cat("Mean:", mean_log, "\n")
cat("Variance:", variance_log, "\n")
cat("Skewness:", skewness_log, "\n")
cat("Kurtosis:", kurtosis_log, "\n")
cat(sprintf("Standard Deviation / Daily Volatility: %.2f%%\n", std_log * 100))
cat(sprintf("Annualized Volatility: %.2f%%\n", annualized_vol * 100))

# Log-return distribution
p1 <- ggplot(data, aes(x = Log_Return * 100)) +
  geom_histogram(bins = 70, color = "black", fill = "skyblue", alpha = 0.7) +
  labs(title = "S&P 500 Daily Returns (Logarithmic)",
       x = "% Return",
       y = "Frequency") +
  theme_minimal() +
  theme(panel.grid.major = element_line(color = "gray", linetype = "dotted"))
cat("\n")



# Arithmetic returns
arith_ret <- data$Arithmetic_Return

# Compute statistics for arithmetic returns
mean_arith <- mean(arith_ret)
variance_arith <- var(arith_ret)
skewness_arith <- skewness(arith_ret)
kurtosis_arith <- kurtosis(arith_ret) - 3  # Excess kurtosis
std_arith <- sd(arith_ret)
annualized_vol_arith <- std_arith * sqrt(252)

cat("For arithmatic returns:\n")
cat("Mean:", mean_arith, "\n")
cat("Variance:", variance_arith, "\n")
cat("Skewness:", skewness_arith, "\n")
cat("Kurtosis:", kurtosis_arith, "\n")
cat(sprintf("Standard Deviation / Daily Volatility: %.2f%%\n", std_arith * 100))
cat(sprintf("Annualized Volatility: %.2f%%\n", annualized_vol_arith * 100))

# Arithmetic-return distribution
p2 <- ggplot(data, aes(x = Arithmetic_Return * 100)) +
  geom_histogram(bins = 70, color = "black", fill = "lightgreen", alpha = 0.7) +
  labs(title = "S&P 500 Daily Returns (Arithmetic)",
       x = "% Return",
       y = "Frequency") +
  theme_minimal() +
  theme(panel.grid.major = element_line(color = "gray", linetype = "dotted"))
cat("\n")

# Display both histograms side by side
grid.arrange(p1, p2, ncol = 2)


##################################################################
# STEP 4: Computation and visualization of Historical volatility #
##################################################################

# Historical volatility

# Function to compute annualized historical volatility over x months
x_month_hv <- function(x, return_series) {
  days <- as.integer(x * 21)     # Approximate trading days per month
  ret_window <- tail(return_series, days)
  daily_vol <- sd(ret_window)
  hv <- daily_vol * sqrt(252)
  return(hv * 100)
}

# Historical volatility using logarithmic returns
cat("For logarithmic returns:\n")
cat(sprintf("1-Month HV: %.2f%%\n", x_month_hv(1, data$Log_Return)))
cat(sprintf("3-Month HV: %.2f%%\n", x_month_hv(3, data$Log_Return)))
cat(sprintf("6-Month HV: %.2f%%\n", x_month_hv(6, data$Log_Return)))
cat("\n")
# Historical volatility using arithmetic returns
cat("For arithmatic returns:\n")
cat(sprintf("1-Month HV: %.2f%%\n", x_month_hv(1, data$Arithmetic_Return)))
cat(sprintf("3-Month HV: %.2f%%\n", x_month_hv(3, data$Arithmetic_Return)))
cat(sprintf("6-Month HV: %.2f%%\n", x_month_hv(6, data$Arithmetic_Return)))



# Rolling volatility

# Function to compute and plot rolling historical volatility
historical_volatility <- function(log_returns, dates = NULL, months = 1, trading_days = 21, plot = TRUE) {
  window <- as.integer(months * trading_days)
  rolling_hv <- rollapply(log_returns, width = window, FUN = sd, fill = NA, align = "right") * sqrt(252) * 100
  df <- data.frame(Log_Return = log_returns, Rolling_HV = rolling_hv)
  if (!is.null(dates)) {
    df$Date <- dates
  } else {
    df$Date <- 1:nrow(df)
  }
  
  if (plot) {
    p <- ggplot(df, aes(x = Date, y = Rolling_HV)) +
      geom_line(color = "#1f77b4") +
      labs(title = sprintf("%d-Month Historical Volatility (Annualized)", months),
           x = "Date",
           y = "Volatility (%)") +
      theme_minimal() +
      theme(panel.grid.major = element_line(color = "gray", linetype = "dotted"))
    
    print(p)
  }
  
  return(df)
}

# Compute and plot 3-month rolling historical volatility
vol_data <- historical_volatility(data$Log_Return, dates = data$Date, months = 3)


# End of the code
