########################################################## ### Program: Risk_measures_analysis_2023_01_08_FL_V1 ##### ########################################################## # Authors: Francois LONGIN and Shengyu ZHENG (ESSEC Business School) # Objective of this file: estimate different risk measures for financial assets # Structure of this file: # Step 1: definition of the input and output files # Step 2: reading of input files (data) # Step 3: computation of basic statistics # Step 4: definition of parameters for risk measures # Step 5: computation of risk measures # Step 6: production of output files # Language used by R to display the error messages Sys.setenv(LANG = "en") # Suppression of all data in the R memory rm(list=ls()) # Clear the console in RStudio cat("\014") # Install required R packages if need if (!require(rstudioapi)) install.packages('rstudioapi') if (!require(lubridate)) install.packages('lubridate') if (!require(dplyr)) install.packages('dplyr') if (!require(quantmod)) install.packages('quantmod') if (!require(moments)) install.packages('moments') if (!require(MASS)) install.packages('MASS') if (!require(POT)) install.packages('POT') if (!require(cvar)) install.packages('cvar') # R packages used in the R code library(rstudioapi) # R function from the rstudioapi to get the name of this R program library(lubridate) #formatting dates library(dplyr) #data manipulation library(quantmod) #retrieving financial data from Yahoo Finance library(moments) # Descriptive Statistics library(MASS) #fit normal distribution library(POT) # Peak over threshold (POT) approach: estimation of the Generalized Pareto distribution (GDP) library(cvar) # Calculation of expected shortfall or stress value based on a theoretical distribution # Definition of the working directory # Adapt the path to the working directory according to the location of the files on your computer and the type of computer (PC/Mac) wd = "G:/1. SimTrade/0. Blog SimTrade/1. Billets en cours de redaction/2022-05 Shengyu ZHENG/Z Risk estimation with R/1. Program/Latest version" # For Francois Longin (PC) # Setting the working directory for the R code setwd(wd) # ************************************************************ # ***** Step 1: definition of the input and output files ***** # ************************************************************ # *** Definition of the names of the output files *** # Output file for the execution of the program date_execution = format(Sys.time(), "%Y_%m_%d_%H_%M") # Date (and time) of execution of the program (format YYYY_MM_DD_HH_MM) file_output1 = paste("3_Results_Risk_measures_analysis_Execution_",date_execution,".csv", sep = "") #file_output1 = paste("3_Results_Risk_measures_analysis_Execution_",date_execution,".csv", sep = "") # Use this if codes are run on R Studio Cloud # Output file for the estimation results of risk measures date_estimation <- as.POSIXct(today(), format = "%d/%m/%Y") file_output2 = paste("Output_risk_measures_",date_execution,".csv", sep = "") # Output file for the data downloaded (from Yahoo Finance for example) file_output3 = paste("Data_",date_execution,".csv", sep = "") # *** Information for the execution file *** # R function from the rstudioapi to get the name of this R program RFileName <- rstudioapi::getActiveDocumentContext()$path a <- paste("Execution of the R program: ", RFileName) write(a, file=file_output1) write("", file=file_output1, append = TRUE) # Starting time of the R program starting_time = Sys.time() a <- paste("Starting time of the R program:", starting_time) write(a, file=file_output1, append = TRUE) # Writing in the same file (use of "Append = TRUE") write("", file=file_output1, append = TRUE) # Version of R used in this program use the 'R.version' variable a <- paste("R version used to execute the program: ", R.version$version.string) write(a, file=file_output1, append = TRUE) write("", file=file_output1, append = TRUE) # Data frame to store all estimation results of risk measures risk_measures <- data.frame(matrix(ncol = 35, nrow = 0)) colnames(risk_measures) <- c("AssetClass", "Instrument", "Mean", "Median", "StandardDeviation", "Variance", "Skewness", "Kurtosis", "RM1_HistoricalVol", "RM1_StartDate", "RM1_EndDate", "RM1_NObs", "RM2_3MVol", "RM2_StartDate", "RM2_EndDate", "RM2_NObs", "RM3_VaR_historical_left", "RM3_VaR_historical_right", "RM3_VaR_normal_right", "RM3_VaR_normal_left", "RM3_StartDate", "RM3_EndDate", "RM3_NObs", "RM4_ES_historical_left", "RM4_ES_historical_right", "RM4_ES_normal_left", "RM4_ES_normal_right", "RM4_StartDate", "RM4_EndDate", "RM4_NObs", "RM5_SV_gpd_left", "RM5_SV_gpd_right", "RM5_StartDate", "RM5_EndDate", "RM5_NObs") # Import of the Yahoo Finance tickers # To know the starting date of the time-series on Yahoo Finance, click on "max" or enter 01/01/1900 in the csv file and check the date in the output file # Choice between two approaches #choice_ticker <- 1 # The user enters the Yahoo Finance tickers as an input choice_ticker <- 2 # The Yahoo Finance tickers are read from an external file if (choice_ticker == 1) { # The Yahoo Finance tickers are entered by the user as an input Yahoo_Finance_Tickers <- data.frame(matrix(ncol = 5, nrow = 0)) colnames(Yahoo_Finance_Tickers) <- c("Ticker","Asset.Class", "Instrument","Start.Date","End.Date") Ticker_Input <- readline(prompt = "Please enter the Yahoo Finance Ticker: ") Asset.Class_Input <- readline(prompt = "Please enter the Asset Class of this asset: ") Instrument_Input <- readline(prompt = "Please enter the name of this asset: ") Start.Date_Input <- readline(prompt = "Please enter the start date of calculation in MMDDYYYY: ") End.Date_Input <- readline(prompt = "Please enter the end date of calculation in MMDDYYYY: ") Start.Date_Input <- as.Date(Start.Date_Input,format = "%m%d%Y") End.Date_Input <- as.Date(End.Date_Input,format = "%m%d%Y") Yahoo_Finance_Tickers_Input <- data.frame(Ticker_Input,Asset.Class_Input,Instrument_Input, Start.Date_Input,End.Date_Input) colnames(Yahoo_Finance_Tickers_Input) <- c("Ticker","Asset.Class", "Instrument","Start.Date","End.Date") for (i in (year(Start.Date_Input)+1):(year(End.Date_Input)-1)) { Yahoo_Finance_Tickers_Input2<- data.frame(Ticker_Input,Asset.Class_Input,Instrument_Input, as.Date(ISOdate(i,1,1),format = "%m%d%Y"), as.Date(ISOdate(i,12,31),format = "%m%d%Y")) colnames(Yahoo_Finance_Tickers_Input2) <- c("Ticker","Asset.Class", "Instrument","Start.Date","End.Date") Yahoo_Finance_Tickers_Input <- rbind(Yahoo_Finance_Tickers_Input, Yahoo_Finance_Tickers_Input2) } Yahoo_Finance_Tickers <- rbind(Yahoo_Finance_Tickers,Yahoo_Finance_Tickers_Input) } if (choice_ticker == 2) { # The Yahoo Finance tickers are read from an external file Yahoo_Finance_Tickers <- read.csv(file="Yahoo_Finance_Tickers_2023_01_08B.csv", header = TRUE, sep=";") # separators could be a comma } # Loop on the assets (Yahoo_Finance_Tickers) for (i in 1:nrow(Yahoo_Finance_Tickers)){ # ************************************************* # ********** Step 2: getting data input *********** # ************************************************* # Retrieve data from Yahoo Finance with the getSymbol function from the QuantMod R package data <- getSymbols(Yahoo_Finance_Tickers$Ticker[i], from = as.Date(Yahoo_Finance_Tickers$Start.Date[i],format = "%m/%d/%Y"), to = as.Date(Yahoo_Finance_Tickers$End.Date[i],format = "%m/%d/%Y"), env = NULL) data <- data[!is.na(data[,6]),] #remove days with NA values for price (if any) # DataFile (name of the data file used for the estimation) DataFile <- file_output3 # DataSource (origin of the data) DataSource <- "Yahoo Finance" # Enter the asset class and the instrument under study AssetClass = Yahoo_Finance_Tickers$Asset.Class[i] Instrument = Yahoo_Finance_Tickers$Instrument[i] # Compute logarithmic percentage returns (with respect to the daily closing prices) data$return = diff(log(data[,6]))*100 # CFL_2022_11_28: to be checked (concordance with the date) data$return[1]=0 # Convert the xts object (from the QuantMod R package) to a data frame and process dates data <- data.frame(Date=index(data), coredata(data)) data$Date <- as.POSIXct(data$Date) # Compute the number of observations, start date and end date of the time series Number_observations <- nrow(data) StartDate <- min(data$Date) EndDate <- max(data$Date) data <- cbind(Ticker = Yahoo_Finance_Tickers$Ticker[i], data) write.table(data, file_output3, sep = ",", append = T, col.names = !file.exists("myDF.csv"),row.names = F) # *************************************************** # ***** Step 3: computation of basic statistics ***** # *************************************************** # Basic statistics (unconditional statistics computed over the entire period from 'Start date' to 'End date' for each time-series) summary(data$return) mean1 <- mean(data$return) median1 <- median(data$return) sd1 <- sd(data$return) var1 <- sd1**2 kurt1 <- kurtosis(data$return) skew1 <- skewness(data$return) head(data$return, 6) # ************************************************************** # ***** Step 4: definition of parameters for risk measures ***** # ************************************************************** # Length of the time period to estimate conditional volatility EstimationPeriodConditionalVolatility <- 60 # in days # Probability to compute VaR and ES Probability_VaR <- 0.95 Probability_ES <- 0.95 # Probability to compute SV Probability_SV <- 0.99 # ************************************************ # ***** Step 5: computation of risk measures ***** # ************************************************ # Returns are used to study a short position and opposite returns for a long position data$opposite_return <- - data$return # Estimation of the Gaussian distribution Gaussian_fitting_right <- fitdistr(data$return,"normal") Gaussian_fitting_left <- fitdistr(data$opposite_return,"normal") # Estimation of the Generalized Pareto distribution (GPD) for the left and right tails of the distribution (use of the POT R package) # Threshold selection is a primordial, yet tricky subject to the correct and effective implementation of the POT-GPD framework. # A commonly used, whereas oversimplified and not necessarily asymptotically correct, approach is to select a high quantile of the date, say the 95% percentile value. # Otherwise, graphical selection for likelihood-based inference is often employed for determining the appropriate threshold. # Here, we adopt the simplest high-quantile approach for the purpose of easier automation. # Bearing in mind that this approach is by no means optimal, the user is high recommended to consider other ways of threshold selection for more robust results. GPD_fitting_right <- fitgpd(data$return, quantile(data$return,probs=Probability_VaR), est="mle") GPD_fitting_left <- fitgpd(data$opposite_return, quantile(data$opposite_return,probs=Probability_VaR), est="mle") summary(GPD_fitting_right,silent=FALSE) # Historical volatility (annualized standard deviation of returns computed over the entire period / from 'Start date' to 'End date' for each time-series) RM1_StartDate <- as.Date(min(data$Date), format = "%d/%m/%Y") RM1_EndDate <- as.Date(max(data$Date), format = "%d/%m/%Y") RM1 <- data %>% filter(Date>=RM1_StartDate & Date <= RM1_EndDate) RM1_NObs <- nrow(data) # CFL_2022_11_28: 252 (to be generalized as a parameter for each asset) RM1_HistoricalVol <- sd(data$return)*sqrt(252) # Annualized volatility # 3-month volatility (standard deviation of returns computed over a 3-month period / from 'End date - Estimation_period_Conditional_Volatility' to 'End date' for each time-series) # CFL_2022_11_28: 3 months or EstimationPeriodConditionalVolatility? RM2_StartDate <- as.Date(max(data$Date)) %m-% months(3) RM2_EndDate <- as.Date(max(data$Date)) RM2 <- data %>% filter(Date>=RM2_StartDate & Date <= RM2_EndDate) RM2_NObs <- nrow(RM2) RM2_3MVol <- sd(RM2$return)*sqrt(252) # Annualized volatility # Value at Risk (VaR) RM3_StartDate <- as.Date(min(data$Date)) RM3_EndDate <- as.Date(max(data$Date)) RM3 <- data %>% filter(Date>=RM3_StartDate & Date <= RM3_EndDate) RM3_NObs <- nrow(RM3) # Value at Risk (VaR) - Historical method RM3_VaR_historical_right <- quantile(RM3$return,probs=Probability_VaR) RM3_VaR_historical_left <- quantile(RM3$opposite_return,probs=Probability_VaR) # Value at Risk (VaR) - Parametric method - Normal distribution RM3_VaR_normal_left <- qnorm(Probability_VaR, mean = Gaussian_fitting_left$estimate[1], sd = Gaussian_fitting_left$estimate[2]) RM3_VaR_normal_right <- qnorm(Probability_VaR, mean = Gaussian_fitting_right$estimate[1], sd = Gaussian_fitting_right$estimate[2]) # Expected shortfall (ES) RM4_StartDate <- as.Date(min(data$Date)) RM4_EndDate <- as.Date(max(data$Date)) RM4 <- data %>% filter(Date>=RM4_StartDate & Date <= RM4_EndDate) RM4_NObs <- nrow(RM4) # Expected shortfall (ES) - Historical method RM4_ES_historical_right <- mean(RM4$return[which(RM4$return>=quantile(RM4$return,probs=Probability_ES))]) RM4_ES_historical_left <- mean(RM4$opposite_return[which(RM4$opposite_return>=quantile(RM4$opposite_return,probs=Probability_ES))]) # Expected shortfall (ES) - Parametric method - Normal distribution RM4_ES_normal_left <- cvar::ES(qnorm, x = 1- Probability_ES, dist.type = "qf", mean = Gaussian_fitting_left$estimate[1], sd = Gaussian_fitting_left$estimate[2]) RM4_ES_normal_right <- cvar::ES(qnorm, x = 1- Probability_ES, dist.type = "qf", mean = Gaussian_fitting_right$estimate[1], sd = Gaussian_fitting_right$estimate[2]) # Stress value (SV) - Parametric method - General Pareto distribution (GPD) RM5_StartDate <- as.Date(min(data$Date)) RM5_EndDate <- as.Date(max(data$Date)) RM5 <- data %>% filter(Date>=RM5_StartDate & Date <= RM5_EndDate) RM5_NObs <- nrow(RM5) RM5_SV_gpd_left <- qgpd(Probability_SV, scale = GPD_fitting_left$fitted.values[1], shape = GPD_fitting_left$fitted.values[2]) RM5_SV_gpd_right <- qgpd(Probability_SV, scale = GPD_fitting_right$fitted.values[1], shape = GPD_fitting_right$fitted.values[2]) # ********************************************** # ***** Step 6: production of output files ***** # ********************************************** # Output CSV file with the following column names: # DataFile (name of the data file used for the estimation) # DataSource (origin of the data) # AssetClass (Equity, Bond, Foreign exchange, Commodities, Crypto, etc.) # Instrument (S&P 500 index, CAC 40 index, etc.) # StartDate (start date of the time series) # EndDate (end date of the time series) # NumberObservations (number of observations of the time series) # Mean (unconditional mean computed from 'Start date' to 'End date' for each time series) # Standard deviation (unconditional standard deviation computed from 'Start date' to 'End date' for each time series) # Variance (unconditional variance computed from 'Start date' to 'End date' for each time series) # Skewness (unconditional skewness computed from 'Start date' to 'End date' for each time series) # Kurtosis (unconditional kurtosis computed from 'Start date' to 'End date' for each time series) # RiskMeasure1 (Historical volatility) # StartDateEstimationHistoricalVolatility (start date for the period of estimation of historical volatility) # EndDateEstimationHistoricalVolatility (end date for the period of estimation of historical volatility) # NumberObservationsEstimationHistoricalVolatility (number of observations in the period of estimation of historical volatility) # HistoricalVolatilityMeasure # RiskMeasure2 (3-month volatility) # StartDateEstimation3MonthVolatility (start date for the period of estimation of 3-month volatility) # EndDateEstimation3MonthVolatility (end date for the period of estimation of 3-month volatility) # NumberObservationsEstimation3MonthVolatility (number of observations in the period of estimation of 3-month volatility) # 3MonthVolatilityMeasure # RiskMeasure3 (Value at Risk) # StartDateEstimationValueAtRisk (start date for the period of estimation of value at risk) # EndDateEstimationValueAtRisk (end date for the period of estimation of value at risk) # NumberObservationsEstimationValueAtRisk (number of observations in the period of estimation of value at risk) # ValueAtRiskMeasure # RiskMeasure4 (Expected shortfall) # StartDateEstimationExpectedShortfall (start date for the period of estimation of expected shortfall) # EndDateEstimationExpectedShortfall (end date for the period of estimation of expected shortfall) # NumberObservationsEstimationExpectedShortfall (number of observations in the period of estimation of expected shortfall) # ExpectedShortfallMeasure # RiskMeasure5 (Stress value) # StartDateEstimationStressValue (start date for the period of estimation of stress value) # EndDateEstimationv (end date for the period of estimation of stress value) # NumberObservationsEstimationv (number of observations in the period of estimation of stress value) # StressValueMeasure # RFileName (name of the R program file used for the estimation of risk measures) # Add the new result to the data frame that stores all risk measures new_entry <- data.frame(DataFile, DataSource, AssetClass, Instrument, mean1, median1, sd1, var1, skew1, kurt1, RM1_StartDate, RM1_EndDate, RM1_NObs, RM1_HistoricalVol, RM2_StartDate, RM2_EndDate, RM2_NObs, RM2_3MVol, RM3_StartDate, RM3_EndDate, RM3_NObs, RM3_VaR_historical_left, RM3_VaR_historical_right, RM3_VaR_normal_left, RM3_VaR_normal_right, RM4_StartDate, RM4_EndDate, RM4_NObs, RM4_ES_historical_left, RM4_ES_historical_right, RM4_ES_normal_left, RM4_ES_normal_right, RM5_StartDate, RM5_EndDate, RM5_NObs, RM5_SV_gpd_left, RM5_SV_gpd_right) risk_measures <- rbind(risk_measures,new_entry) } # End of the loop 'i in 1:nrow(Yahoo_Finance_Tickers' on the assets (Yahoo_Finance_Tickers) # Add the results of risk measures to the output file # CFL_2022_11_28: I don't understand the following line of code / myDF.csv? write.table(risk_measures, file_output2, sep = ",", append = T, col.names = !file.exists("myDF.csv"),row.names = F) write("", file=file_output2, append = TRUE) # *** Information for the execution file *** # *** Compute the duration of the program # Storing of the ending time of the program ending_time = Sys.time() a <- paste("Ending time of the program:", ending_time) write(a, file=file_output1, append = TRUE) write("", file=file_output1, append = TRUE) # Computation of the duration of the program diff_time = ending_time - starting_time a <- paste("Duration of the program:", format(diff_time, digits=2, nsmall=2)) write(a, file=file_output1, append = TRUE) write("", file=file_output1, append = TRUE) ########################## ### End of the program ### ########################## # References : # Yahoo Finance: https://finance.yahoo.com # https://fr.finance.yahoo.com/quote/%5EGSPC?p=^GSPC # https://www.rdocumentation.org/packages/quantmod/versions/0.4.20/topics/getSymbols.yahoo