/* Use Jackknife method to compute discretionary accruals / / see https://mingze-gao.com/posts/compute-jackknife-coefficient-estimates-in-sas/ */
/* UseHribarCollinsTotalAccruals:
– true: use Hribar-Collins Cashflow Total Accruals
– false: use normal method */
%let wrds=wrds-cloud.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=weixiaoqin password=””;
libname local ‘C:\Users\Xiaoqin(Patrick) Wei\Desktop\Chapter2 data\ACCM-modified Jones model’;
rsubmit;
%let UseHribarCollinsTotalAccruals = false;
/* Include %array and %do_over */
filename do_over url “https://mingze-gao.com/utils/do_over.sas”;
filename array url “https://mingze-gao.com/utils/array.sas”;
%include do_over array;
/* Winsorize macro */
filename winsor url “https://mingze-gao.com/utils/winsor.sas”;
%include winsor;
/*
Earnings management models
Modified based on the work by Joost Impink, March 2016
Models estimated (Note that the intercept a0 is removed in the modified code below):
– Jones model, tac = a0 + a1 1/TAt-1 + a2chSales + a3PPE + a4ROA + error.
– variable names DA_Jones
– Modified Jones model, as Jones model, but using chSales – chREC to compute fitted values.
– variable names DA_mJones
– Kothari 2005, controlling for ROA, tac = a0 + a1 1/TAt-1 + a2(chSales – chREC) + a3PPE + a4ROA + error.
– variable names DA_Kothari
– Kothari 2005, performance matched, Jones model, difference in discretionary accruals between firm and closest firm in terms of (contemporaneous) roa
– variable names DA_pmKothari_Jones
– Kothari 2005, performance matched, modified Jones model, difference in discretionary accruals between firm and closest firm in terms of (contemporaneous) roa
– variable names DA_pmKothari_mJones
tac: Total accruals, computed as net profit after tax before extraordinary items less cash flows from operations
1/TAt-1: Inverse of beginning of year total assets
chSales: Change in net sales revenue
chREC: Change in net receivables
PPE: Gross property, plant, and equipment
ROA: Return on assets.
Variables used Compustat Funda
AT: Total assets
IB: Income Before Extraordinary Items
IBC: Income Before Extraordinary Items (Cash Flow) (used if IB is missing)
OANCF: Operating Activities – Net Cash Flow
PPEGT: Property, Plant and Equipment – Total (Gross)
RECT: Receivables – Total
SALE: Sales
INVT: Inventories – Total
LCO: Current Liabilities Other Total
DP: Depreciation and Amortization
ACO: Current Assets Other Total
AP: Accounts Payable – Trade
*/
/* Get Funda variables */
%let fundaVars = at ib ibc oancf ppegt rect sale xidoc lco dp aco invt ap;
data work.a_funda(keep=key gvkey fyear datadate sich &fundaVars);
set comp.funda;
if 1980 <= fyear <= 2023; /* Generic filter / if indfmt=’INDL’ and datafmt=’STD’ and popsrc=’D’ and consol=’C’; / Firm-year identifier / key = gvkey || fyear; / Keep if sale > 0, at > 0 / if sale > 0 and at > 0; / Use Income Before Extraordinary Items (Cash Flow) if ib is missing */
if ib =. then ib=ibc;
run;
/* Lagged values for: at sale rect invt aco ap lco */
%let lagVars = at sale rect invt aco ap lco;
/* Self join to get lagged values at_l, sale_l, rect_l / proc sql; create table work.b_funda as select a., %do_over(values=&lagVars, between=comma, phrase=b.? as ?_l)
from work.a_funda a, work.a_funda b
where a.gvkey = b.gvkey and a.fyear-1 = b.fyear;
quit;
/* Construct additional variables / data work.b_funda(compress=yes); set work.b_funda; / 2-digit SIC / sic2 = int(sich/100); / variables / if “&UseHribarCollinsTotalAccruals.” eq “false” then tac = ((rect-rect_l)+(invt-invt_l)+(aco-aco_l)-(ap-ap_l)-(lco-lco_l)-dp)/at_l; / Accruals ratio / else tac = (ibc – oancf + xidoc)/at_l; / Hribar Collins total cash flow accruals / inv_at_l = 1 / at_l; drev = (sale – sale_l) / at_l; drevadj = (sale – sale_l)/at_l – (rect – rect_l)/at_l; ppe = ppegt / at_l; roa = ib / at_l; / these variables may not be missing (cmiss counts missing variables)*/
*if cmiss (of tac inv_at_l drevadj ppe roa) eq 0;
run;
/* Optional winsorization before industry-year regression */
%let winsVars = tac inv_at_l drev drevadj ppe roa ;
%winsor(dsetin=work.b_funda, dsetout=work.b_funda_wins, byvar=fyear, vars=&winsVars, type=winsor, pctl=1 99);
/* Regression by industry-year
edf(error degrees of freedom) + #params will equal the number of obs (no need for proc univariate to count) / proc sort data=work.b_funda_wins; by fyear sic2; run; / regressors / %array(vars, values=inv_at_l drev ppe drevadj roa); ods listing close; proc reg data=work.b_funda_wins edf outest=work.c_parms; by fyear sic2; id key; / Jones Model / Jones: model tac = inv_at_l drev ppe / noint influence i; / Kothari with ROA in model */
Kothari: model tac = inv_at_l drevadj ppe roa / noint influence i;
ods output OutputStatistics=work.outstats InvXPX=work.xpxinv;
run;
ods listing;
/* Compute discretionary accrual measures / proc sql; / Compute firm-year Jackknifed coefficient estimates / create table work.xpxinv2 as / Extract the diagnol elements of the symmetric inv(X’X) for each firm-year / select fyear, sic2, model, %do_over(vars, phrase=sum(case when variable=”?” then xpxinv else . end) as ?, between=comma) from (select fyear, sic2, model, variable, case %do_over(vars, phrase=when variable=”?” then ?) else . end as xpxinv from work.xpxinv where variable ~= ‘tac’) group by fyear, sic2, model order by fyear, sic2, model; / The difference between original coefficient estimates and the Jackknifed estimates / create table work.bias as select a.fyear, a.sic2, a.model, a.key, %do_over(vars, phrase=a.DFB_?(a.Residual/(a.RStudentsqrt(1-a.HatDiagonal)))sqrt(b.?) as bias_?, between=comma)
from work.outstats as a left join work.xpxinv2 as b
on a.fyear=b.fyear and a.sic2=b.sic2 and a.model=b.model
order by a.fyear, a.sic2, a.model, a.key;
/* Compute Jackknifed coefficient estimates by subtracting the bias from the original estimates / create table work.Jackknifed_params as select a.fyear, a.sic2, a.model, a.key, %do_over(vars, phrase=b.? – a.bias_? as ?, between=comma), b.EDF
from work.bias as a left join work.c_parms as b
on a.fyear=b.fyear and a.sic2=b.sic2 and a.model=b.MODEL
order by a.fyear, a.sic2, a.model, a.key;
/ Compute discretionary accruals / create table work.tmp as select distinct a.fyear, a.sic2, a.gvkey, a.key, / Jones model at a minimum 8 obs (5 degrees of freedom + 3 params) */
sum(case when b.model eq ‘Jones’ and b.EDF ge 5 then
a.tac – (%do_over(values=inv_at_l drev ppe, between=%str(+), phrase=a.? * b.?)) else . end) as DA_Jones,
/* Modified Jones model: drev is used in first model, but drevadj is used to compute fitted value */
sum(case when b.model eq ‘Jones’ and b.EDF ge 5 then
a.tac – (a.drevadj * b.drev + %do_over(values=inv_at_l ppe, between=%str(+), phrase=a.? * b.?)) else . end) as DA_mJones,
/* Kothari model (with ROA in regression) at a minimum 8 obs (4 degrees of freedom + 4 params) */
sum(case when b.model eq ‘Kothari’ and b.EDF ge 4 then
a.tac – (%do_over(values=inv_at_l drevadj ppe roa, between=%str(+), phrase=a.? * b.?)) else . end) as DA_Kothari
from work.b_funda_wins as a left join work.Jackknifed_params as b
on a.key=b.key
group by a.key
order by a.gvkey, a.fyear;
/* Kothari performance matching: get DA_Jones (DA_mJones) accruals for the matched firm closest in ROA / create table work.da_roa as select a., b.roa from work.tmp as a left join work.b_funda_wins as b on a.key=b.key;
create table work.da_all as
select a., / gvkey of matched firm / b.gvkey as gvkey_m, / difference in ROA / abs(a.roa – b.roa) as Difference, / difference in DA_Jones / a.DA_Jones – b.DA_Jones as DA_pmKothari_Jones, a.DA_mJones – b.DA_mJones as DA_pmKothari_mJones from work.da_roa as a left join work.da_roa as b on a.fyear = b.fyear and a.sic2 = b.sic2 / same 2-digit SIC industry-year / and a.key ne b.key / not the same firm / group by a.gvkey, a.fyear having Difference = min(Difference) / keep best match for size difference */
order by gvkey, fyear;
quit;
/* drop possible multiple matches (with the same difference) in previous step */
proc sort data=work.da_all nodupkey; by key; run;
%let DAVars = DA_Jones DA_mJones DA_Kothari DA_pmKothari_Jones DA_pmKothari_mJones;
/* Winsorize discretionary accrual variables (Optional) */
%winsor(dsetin=work.da_all, dsetout=work.accruals_HribarCollins_&UseHribarCollinsTotalAccruals., byvar=fyear, vars=&DAVars, type=winsor, pctl=1 99);
/* Means, medians for key variables */
proc means data=work.accruals_HribarCollins_&UseHribarCollinsTotalAccruals. n mean min median max; var &DAVars; run;
proc download data=da_all out=local.ACCM_modified; run;
endrsubmit;
signoff;
Reference: