Stock Price Crash Risk

Posted by:

|

On:

|

/************************************************************************************
* Stock Price Crash Risk
* US market                                                     
* Author: Xiaoqin(Alex) Wei
************************************************************************************/

%let wrds=wrds-cloud.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;

rsubmit;


%let fyear1 = 1950;
%let fyear2 = 2022;

%let year_start = 01JAN1950;
%let year_end   = 31DEC2022;

/************************************************************************************
* STEP ONE: Extract Compustat data;
************************************************************************************/

/* import Compustat data to receive end date of fiscal year */
data comp_data;
    set comp.funda;
    where fyear between &fyear1 and &fyear2;
    if datafmt='STD' and popsrc='D' and consol='C';
	/*create begin and end dates for fiscal year*/
    endfyr = datadate; /* end date of fiscal year */
    begfyr = intnx('month',endfyr,-11,'beg'); /* start date of fiscal year, pushback for 1 year */
	format endfyr begfyr yymmdd10.;
    keep gvkey cusip endfyr begfyr fyear;
run;

/*proc print data=comp_data(obs=10);*/
/*run;*/

/*******************************************************************************************
* STEP TWO: Link GVKEYS to CRSP Identifiers;                                               *
* Use CCMXPF_LNKHIST table to obtain CRSP identifiers for our subset of companies/dates;   *
********************************************************************************************/

proc sql;
  create table comp_data_with_permno as select *
  from comp_data as a, crsp.ccmxpf_lnkhist as b
  where a.gvkey = b.gvkey and
  b.LINKTYPE in ("LU","LC","LD","LN","LS","LX") and
  (b.LINKDT <= a.endfyr or b.LINKDT = .B) and (a.endfyr <= b.LINKENDDT or b.LINKENDDT = .E);               
 /******************************************************************************************
  * The previous condition requires the end of fiscal year to fall within the link range.  *
  *                                                                                        *
  * A more relaxed condition would require any part of the fiscal year to be within the    *
  * link range:                                                                            *
  * (b.LINKDT <= a.endfyr or missing(b.LINKDT) = 1) and (b.LINKENDDT >= a.begfyr or        *
  * missing(b.LINKENDDT)= 1);                                                              *
  * or a more strict condition would require the entire fiscal year to be within           *
  * the link range :                                                                       *
  * (b.LINKDT <= a.begfyr or missing(b.LINKDT) = 1) and                                    *
  * (a.endfyr <= b.LINKENDDT or b.LINKENDDT= .E);                                          *
  *                                                                                        *
  * If these conditions are used, we suggest using the result data set from the            *
  * "collapsing" procedure -                                                               *
  * which is shown in sample program ccm_lnktable.sas - to replace crsp.ccmxpf_linktable.  *
  ******************************************************************************************/
	
quit;
 
/*proc print data=&syslast(obs=50);*/
/*  by gvkey;*/
/*  id fyear;*/
/*run;*/


/***************************************************************************************
* STEP THREE: Add CRSP daily stock and market price data;
***************************************************************************************/

/* import daily CRSP stock date */
data daily_stock_data;
    set crsp.dsf (keep= permno date ret);
    where ret is not missing and date between "&year_start."d and "&year_end."d;
    format date yymmdd10.;
run;


/*proc print data=daily_stock_data(obs=10);*/
/*run;*/

/* merge daily CRSP stock date to CCM*/
proc sql;
    create table merged_data_crsp_CCM as
    select a.gvkey, a.cusip, a.endfyr, a.begfyr, a.fyear, b.date, b.ret, b.permno
    from comp_data_with_permno as a
    left join daily_stock_data as b
    on a.lpermno = b.permno
    where b.date between a.begfyr and a.endfyr; /*this critera make sure only date between begin and end of fiscal year kept*/
quit;

/*proc print data=merged_data(obs=50);*/
/*run;*/

/* extract daily market return */
data daily_market_return;
    set crsp.dsi (keep=date vwretx);
    where vwretx is not missing and date between "&year_start."d and "&year_end."d;
run;

/* merge daily CRSP stock date with CCM to market return*/
proc sql;
    create table merged_data_crsp_CCM_Mkt as
    select a.gvkey, a.cusip, a.endfyr, a.begfyr, a.fyear, b.date, a.ret, a.permno, b.vwretx
    from merged_data_crsp_CCM as a
    left join daily_market_return as b
    on a.date = b.date
    where b.date between a.begfyr and a.endfyr;
quit;

proc print data=merged_data_crsp_CCM_Mkt(obs=50);
run;

/***************************************************************************************
* STEP FOUR: clean and reformat dataset;
***************************************************************************************/

/*count week and drop if number of weeks < 30*/

data merged_data_with_weeks;
    set merged_data_crsp_CCM_Mkt;
    weeks_between = intck('week', begfyr, endfyr);

run;

data merged_data_cleaned;
    set merged_data_with_weeks; /* dataset with number of weeks */
    if weeks_between >= 30;
run;


/***************************************************************************************
* STEP FIVE: calculate weekly return;
***************************************************************************************/

proc sort data=merged_data_cleaned;
    by permno fyear date;
run;

data weekly_returns;
    set merged_data_cleaned;
    by permno fyear date;

    retain weekly_prod_ret 1 market_weekly_prod_ret 1 week_number 0 last_week;

    /* Check new fiscal year and new firm */
    if first.fyear or first.permno then do;
        week_number = 1;
        last_week = week(date);
        weekly_prod_ret = 1;
        market_weekly_prod_ret = 1;
    end;

    /* Cumulate weekly return for firm and market */
    weekly_prod_ret = weekly_prod_ret * (1 + ret);
    market_weekly_prod_ret = market_weekly_prod_ret * (1 + vwretx);

    /* Check if weekends or end of fiscal year */
    if last.fyear or week(date) ne last_week then do;
        weekly_return = weekly_prod_ret - 1;
        market_weekly_return = market_weekly_prod_ret - 1;
        output;
        weekly_prod_ret = 1;  /* Reset return for firm */
        market_weekly_prod_ret = 1;  /* Reset return for market */
        last_week = week(date);
        week_number = week_number + 1;
    end;
run;

/*proc print data=weekly_returns_with_fyear(obs=50);*/
/*run;*/

/***************************************************************************************
* STEP SIX: calculate lead and lag weekly returns of market;
***************************************************************************************/

/*calculate  lead1 lead2*/
proc sort data=weekly_returns out=temp_reverse;
    by descending permno descending fyear descending date;
run;

data temp_lead;
    set temp_reverse;
    by descending permno descending fyear;

    if first.fyear or first.permno then do;
        call missing(Lead1_Rm, Lead2_Rm);
    end;

    Lead1_Rm = lag1(market_weekly_return); /* next week return */
    Lead2_Rm = lag2(market_weekly_return); /* next next week return */

    if first.fyear or first.permno then do;
        Lead1_Rm = .;
        Lead2_Rm = .;
    end;
run;

proc sort data=temp_lead out=temp_lead_sorted;
    by permno fyear date;
run;

/*calculate lag1 lag2 lead1 lead2 (add lag1 lag2 from lead1 lead2)*/
data pre_dataset1;
    merge weekly_returns (in=a)
          temp_lead_sorted (in=b keep=permno fyear date Lead1_Rm Lead2_Rm);
    by permno fyear date;
    /* reset calculation of lagged value */
    if first.permno or first.fyear then do;
        call missing(L_Rm, L2_Rm);
    end;

    L_Rm = lag1(market_weekly_return); /* lag1 */
    L2_Rm = lag2(market_weekly_return); /* lag2 */

    /* stop lag cross permno or fyear */
    if first.permno or first.fyear then do;
        L_Rm = .;
        L2_Rm = .;
    end;
run;

/*proc print data=pre_dataset1(obs=500);*/
/*run;*/


/***************************************************************************************
* STEP SEVEN: calculate resdiual;
***************************************************************************************/
/* calculate resdiual */
proc reg data=pre_dataset1 noprint;
    by permno fyear;
    model weekly_return = Lead2_Rm Lead1_Rm market_weekly_return L_Rm L2_Rm;
    output out=res_residuals p=_residuals; 
run;

data residuals;
    set res_residuals;
    W = log(1 + _residuals);
    W3 = W**3;
    W2 = W**2;
run;

/***************************************************************************************
* STEP EIGHT: calculate crash risk NCSKEW;
***************************************************************************************/

/* calculate NCSKEW */
proc means data=residuals noprint;
    by permno fyear;
    var W3 W2;
    output out=ncskew_data sum(W3)=W3_sum sum(W2)=W2_sum;
run;

data ncskew_data;
    set ncskew_data;
    NCSKEW = -1 * (_freq_ * (_freq_ - 1)**1.5 * W3_sum) / ((_freq_ - 1) * (_freq_ - 2) * W2_sum**1.5);
run;

/***************************************************************************************
* STEP NINE: calculate crash risk DUVOL;
***************************************************************************************/

/* calculate DUVOL */
data data residuals2;
    set res_residuals;
    W =  _residuals;
run;

proc sql;
    create table work.residuals3 as
    select a.*,
           mean(W) as W_mean
    from residuals2 as a
    group by permno, fyear;
quit;

data duvol_data;
    set residuals3;
    by permno fyear;
    retain W_mean n_u n_d W2_sum_Up W2_sum_Down;

    /* Initialize variables at the start of each year */
    if first.fyear then do;
        W_mean = 0;
        n_u = 0;
        n_d = 0;
        W2_sum_Up = 0;
        W2_sum_Down = 0;
    end;
    /* Update counts and sums based on current observation */
    if W > W_mean then do;
        n_u + 1;
        W2_sum_Up + W**2;
    end;
    else if W < W_mean then do;
        n_d + 1;
        W2_sum_Down + W**2;
    end;
    /* Compute DUVOL and other metrics at the end of each year */
    if last.fyear then do;
        W_mean = W_mean / _N_;
        DUVOL = log((((n_u - 1) * W2_sum_Down) / ((n_d - 1) * W2_sum_Up)));
        output;
    end;
    keep permno fyear W_mean n_u n_d W2_sum_Up W2_sum_Down DUVOL;

/***************************************************************************************
* STEP TEN: calculate crash risk Crash;
***************************************************************************************/

/* calculate Crash */
/* calculate std and mean */
proc means data=pre_dataset1 noprint;
    by permno fyear;
    var weekly_return;
    output out=summary_dataset (drop=_TYPE_ _FREQ_) 
           mean=week_return_mean 
           std=week_return_sd;
run;

/* merge dataset */
data merged_with_summary;
    merge pre_dataset1 summary_dataset;
    by permno fyear;
run;

/* detect crash events and count crash */
data crash_detection;
    set merged_with_summary;
    by permno fyear;

    retain count 0 crash 0; /* initialize crash and count */

    /* is_crash */
    if weekly_return <= (week_return_mean - 3.09 * week_return_sd) then do;
        is_crash = 1;
        count + 1; /* is_crash=1 count */
        crash = 1; /* label crash */
    end;
    else is_crash = 0;

    /* last fiscal year */
    if last.fyear then do;
        output;
        count = 0; /* reset */
        crash = 0; /* reset */
    end;
run;


/***************************************************************************************
* STEP ELEVEN: Merge and output;
***************************************************************************************/

proc sort data=ncskew_data; by permno fyear; run;
proc sort data=duvol_data; by permno fyear; run;
proc sort data=crash_detection; by permno fyear; run;

/* merge dataset */
data crash_risk_final;
    merge pre_dataset1 ncskew_data duvol_data crash_detection;
    by permno fyear;
run;

proc sort data=crash_risk_final out=crash_risk_final1 nodupkey;
    by gvkey fyear; /* duplicates gvkey fyear */
run;

proc print data=crash_risk_final1 (obs=50);
run;

/* export to CSV */
proc export data=crash_risk_final1
    outfile="crash_risk_final.csv"
    dbms=csv
    replace;
run;


endrsubmit;
signoff;

Reference:

Amy P. Hutton, Alan J. Marcus, and Hassan Tehranian. (2009) Opaque financial reports, R2, and crash risk. Journal of Financial Economics, 94(1):67–86, October 2009. ISSN 0304405X. doi: 10.1016/j.jfineco.2008.10.003. URL: https://www.sciencedirect.com/science/article/pii/S0304405X09000993.

Posted by

in