/************************************************************************************
* Stock Price Crash Risk
* US market
* Author: Xiaoqin(Alex) Wei
************************************************************************************/
%let wrds=wrds-cloud.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
rsubmit;
%let fyear1 = 1950;
%let fyear2 = 2022;
%let year_start = 01JAN1950;
%let year_end = 31DEC2022;
/************************************************************************************
* STEP ONE: Extract Compustat data;
************************************************************************************/
/* import Compustat data to receive end date of fiscal year */
data comp_data;
set comp.funda;
where fyear between &fyear1 and &fyear2;
if datafmt='STD' and popsrc='D' and consol='C';
/*create begin and end dates for fiscal year*/
endfyr = datadate; /* end date of fiscal year */
begfyr = intnx('month',endfyr,-11,'beg'); /* start date of fiscal year, pushback for 1 year */
format endfyr begfyr yymmdd10.;
keep gvkey cusip endfyr begfyr fyear;
run;
/*proc print data=comp_data(obs=10);*/
/*run;*/
/*******************************************************************************************
* STEP TWO: Link GVKEYS to CRSP Identifiers; *
* Use CCMXPF_LNKHIST table to obtain CRSP identifiers for our subset of companies/dates; *
********************************************************************************************/
proc sql;
create table comp_data_with_permno as select *
from comp_data as a, crsp.ccmxpf_lnkhist as b
where a.gvkey = b.gvkey and
b.LINKTYPE in ("LU","LC","LD","LN","LS","LX") and
(b.LINKDT <= a.endfyr or b.LINKDT = .B) and (a.endfyr <= b.LINKENDDT or b.LINKENDDT = .E);
/******************************************************************************************
* The previous condition requires the end of fiscal year to fall within the link range. *
* *
* A more relaxed condition would require any part of the fiscal year to be within the *
* link range: *
* (b.LINKDT <= a.endfyr or missing(b.LINKDT) = 1) and (b.LINKENDDT >= a.begfyr or *
* missing(b.LINKENDDT)= 1); *
* or a more strict condition would require the entire fiscal year to be within *
* the link range : *
* (b.LINKDT <= a.begfyr or missing(b.LINKDT) = 1) and *
* (a.endfyr <= b.LINKENDDT or b.LINKENDDT= .E); *
* *
* If these conditions are used, we suggest using the result data set from the *
* "collapsing" procedure - *
* which is shown in sample program ccm_lnktable.sas - to replace crsp.ccmxpf_linktable. *
******************************************************************************************/
quit;
/*proc print data=&syslast(obs=50);*/
/* by gvkey;*/
/* id fyear;*/
/*run;*/
/***************************************************************************************
* STEP THREE: Add CRSP daily stock and market price data;
***************************************************************************************/
/* import daily CRSP stock date */
data daily_stock_data;
set crsp.dsf (keep= permno date ret);
where ret is not missing and date between "&year_start."d and "&year_end."d;
format date yymmdd10.;
run;
/*proc print data=daily_stock_data(obs=10);*/
/*run;*/
/* merge daily CRSP stock date to CCM*/
proc sql;
create table merged_data_crsp_CCM as
select a.gvkey, a.cusip, a.endfyr, a.begfyr, a.fyear, b.date, b.ret, b.permno
from comp_data_with_permno as a
left join daily_stock_data as b
on a.lpermno = b.permno
where b.date between a.begfyr and a.endfyr; /*this critera make sure only date between begin and end of fiscal year kept*/
quit;
/*proc print data=merged_data(obs=50);*/
/*run;*/
/* extract daily market return */
data daily_market_return;
set crsp.dsi (keep=date vwretx);
where vwretx is not missing and date between "&year_start."d and "&year_end."d;
run;
/* merge daily CRSP stock date with CCM to market return*/
proc sql;
create table merged_data_crsp_CCM_Mkt as
select a.gvkey, a.cusip, a.endfyr, a.begfyr, a.fyear, b.date, a.ret, a.permno, b.vwretx
from merged_data_crsp_CCM as a
left join daily_market_return as b
on a.date = b.date
where b.date between a.begfyr and a.endfyr;
quit;
proc print data=merged_data_crsp_CCM_Mkt(obs=50);
run;
/***************************************************************************************
* STEP FOUR: clean and reformat dataset;
***************************************************************************************/
/*count week and drop if number of weeks < 30*/
data merged_data_with_weeks;
set merged_data_crsp_CCM_Mkt;
weeks_between = intck('week', begfyr, endfyr);
run;
data merged_data_cleaned;
set merged_data_with_weeks; /* dataset with number of weeks */
if weeks_between >= 30;
run;
/***************************************************************************************
* STEP FIVE: calculate weekly return;
***************************************************************************************/
proc sort data=merged_data_cleaned;
by permno fyear date;
run;
data weekly_returns;
set merged_data_cleaned;
by permno fyear date;
retain weekly_prod_ret 1 market_weekly_prod_ret 1 week_number 0 last_week;
/* Check new fiscal year and new firm */
if first.fyear or first.permno then do;
week_number = 1;
last_week = week(date);
weekly_prod_ret = 1;
market_weekly_prod_ret = 1;
end;
/* Cumulate weekly return for firm and market */
weekly_prod_ret = weekly_prod_ret * (1 + ret);
market_weekly_prod_ret = market_weekly_prod_ret * (1 + vwretx);
/* Check if weekends or end of fiscal year */
if last.fyear or week(date) ne last_week then do;
weekly_return = weekly_prod_ret - 1;
market_weekly_return = market_weekly_prod_ret - 1;
output;
weekly_prod_ret = 1; /* Reset return for firm */
market_weekly_prod_ret = 1; /* Reset return for market */
last_week = week(date);
week_number = week_number + 1;
end;
run;
/*proc print data=weekly_returns_with_fyear(obs=50);*/
/*run;*/
/***************************************************************************************
* STEP SIX: calculate lead and lag weekly returns of market;
***************************************************************************************/
/*calculate lead1 lead2*/
proc sort data=weekly_returns out=temp_reverse;
by descending permno descending fyear descending date;
run;
data temp_lead;
set temp_reverse;
by descending permno descending fyear;
if first.fyear or first.permno then do;
call missing(Lead1_Rm, Lead2_Rm);
end;
Lead1_Rm = lag1(market_weekly_return); /* next week return */
Lead2_Rm = lag2(market_weekly_return); /* next next week return */
if first.fyear or first.permno then do;
Lead1_Rm = .;
Lead2_Rm = .;
end;
run;
proc sort data=temp_lead out=temp_lead_sorted;
by permno fyear date;
run;
/*calculate lag1 lag2 lead1 lead2 (add lag1 lag2 from lead1 lead2)*/
data pre_dataset1;
merge weekly_returns (in=a)
temp_lead_sorted (in=b keep=permno fyear date Lead1_Rm Lead2_Rm);
by permno fyear date;
/* reset calculation of lagged value */
if first.permno or first.fyear then do;
call missing(L_Rm, L2_Rm);
end;
L_Rm = lag1(market_weekly_return); /* lag1 */
L2_Rm = lag2(market_weekly_return); /* lag2 */
/* stop lag cross permno or fyear */
if first.permno or first.fyear then do;
L_Rm = .;
L2_Rm = .;
end;
run;
/*proc print data=pre_dataset1(obs=500);*/
/*run;*/
/***************************************************************************************
* STEP SEVEN: calculate resdiual;
***************************************************************************************/
/* calculate resdiual */
proc reg data=pre_dataset1 noprint;
by permno fyear;
model weekly_return = Lead2_Rm Lead1_Rm market_weekly_return L_Rm L2_Rm;
output out=res_residuals p=_residuals;
run;
data residuals;
set res_residuals;
W = log(1 + _residuals);
W3 = W**3;
W2 = W**2;
run;
/***************************************************************************************
* STEP EIGHT: calculate crash risk NCSKEW;
***************************************************************************************/
/* calculate NCSKEW */
proc means data=residuals noprint;
by permno fyear;
var W3 W2;
output out=ncskew_data sum(W3)=W3_sum sum(W2)=W2_sum;
run;
data ncskew_data;
set ncskew_data;
NCSKEW = -1 * (_freq_ * (_freq_ - 1)**1.5 * W3_sum) / ((_freq_ - 1) * (_freq_ - 2) * W2_sum**1.5);
run;
/***************************************************************************************
* STEP NINE: calculate crash risk DUVOL;
***************************************************************************************/
/* calculate DUVOL */
data data residuals2;
set res_residuals;
W = _residuals;
run;
proc sql;
create table work.residuals3 as
select a.*,
mean(W) as W_mean
from residuals2 as a
group by permno, fyear;
quit;
data duvol_data;
set residuals3;
by permno fyear;
retain W_mean n_u n_d W2_sum_Up W2_sum_Down;
/* Initialize variables at the start of each year */
if first.fyear then do;
W_mean = 0;
n_u = 0;
n_d = 0;
W2_sum_Up = 0;
W2_sum_Down = 0;
end;
/* Update counts and sums based on current observation */
if W > W_mean then do;
n_u + 1;
W2_sum_Up + W**2;
end;
else if W < W_mean then do;
n_d + 1;
W2_sum_Down + W**2;
end;
/* Compute DUVOL and other metrics at the end of each year */
if last.fyear then do;
W_mean = W_mean / _N_;
DUVOL = log((((n_u - 1) * W2_sum_Down) / ((n_d - 1) * W2_sum_Up)));
output;
end;
keep permno fyear W_mean n_u n_d W2_sum_Up W2_sum_Down DUVOL;
/***************************************************************************************
* STEP TEN: calculate crash risk Crash;
***************************************************************************************/
/* calculate Crash */
/* calculate std and mean */
proc means data=pre_dataset1 noprint;
by permno fyear;
var weekly_return;
output out=summary_dataset (drop=_TYPE_ _FREQ_)
mean=week_return_mean
std=week_return_sd;
run;
/* merge dataset */
data merged_with_summary;
merge pre_dataset1 summary_dataset;
by permno fyear;
run;
/* detect crash events and count crash */
data crash_detection;
set merged_with_summary;
by permno fyear;
retain count 0 crash 0; /* initialize crash and count */
/* is_crash */
if weekly_return <= (week_return_mean - 3.09 * week_return_sd) then do;
is_crash = 1;
count + 1; /* is_crash=1 count */
crash = 1; /* label crash */
end;
else is_crash = 0;
/* last fiscal year */
if last.fyear then do;
output;
count = 0; /* reset */
crash = 0; /* reset */
end;
run;
/***************************************************************************************
* STEP ELEVEN: Merge and output;
***************************************************************************************/
proc sort data=ncskew_data; by permno fyear; run;
proc sort data=duvol_data; by permno fyear; run;
proc sort data=crash_detection; by permno fyear; run;
/* merge dataset */
data crash_risk_final;
merge pre_dataset1 ncskew_data duvol_data crash_detection;
by permno fyear;
run;
proc sort data=crash_risk_final out=crash_risk_final1 nodupkey;
by gvkey fyear; /* duplicates gvkey fyear */
run;
proc print data=crash_risk_final1 (obs=50);
run;
/* export to CSV */
proc export data=crash_risk_final1
outfile="crash_risk_final.csv"
dbms=csv
replace;
run;
endrsubmit;
signoff;
Reference:
Amy P. Hutton, Alan J. Marcus, and Hassan Tehranian. (2009) Opaque financial reports, R2, and crash risk. Journal of Financial Economics, 94(1):67–86, October 2009. ISSN 0304405X. doi: 10.1016/j.jfineco.2008.10.003. URL: https://www.sciencedirect.com/science/article/pii/S0304405X09000993.