Two-Stage Estimation of Switching Regression

The switching regression is an extension of the Heckit model, which is also known as the type-V Tobit model and assumes that there is a multivariate normal distribution for three latent variables Y1*, Y2*, and Y3* such that

A. Y1 = 1 for Y1* > 0 and Y1 = 0 for Y1* <= 0;

B. Y2 = Y2* for Y1 = 1 and Y2 = 0 for Y1 = 0;

C. Y3 = Y3* for Y1 = 0 and Y3 = 0 for Y1 = 1.

Therefore, Y2 and Y3 would not be observable at the same time.

In SAS, the switching regression can be implemented with the QLIM procedure, as shown in ( http://support.sas.com/documentation/cdl/en/etsug/63939/HTML/default/viewer.htm#etsug_qlim_sect039.htm
). However, when using the QLIM procedure in practice, I sometimes find that the MLE might not converge given the complexity of the likelihood function. In the example below, a two-stage estimation approach by using simple LOGISTIC and REG procedures is demonstrated. Benefits of the two-stage estimation are twofold. First of all, it is extremely easy to implement in practice. Secondly, when the MLE is preferred, estimated parameters from the two-stage approach can be used to provide initial values in the optimization to help the MLE convergence.

data d1;
  keep y1 y2 y3 x1 x2;
  do i = 1 to 500;
    x1 = rannor(1);
    x2 = rannor(1);
    u1 = rannor(1);
    u2 = rannor(1);
    u3 = rannor(1);
    y1l = 1 + 2 * x1 + 3 * x2 + u1;
    y2l = 1 + 2 * x1 + u1 * 0.2 + u2;
    y3l = 1 - 2 * x2 + u1 * 0.1 - u2 * 0.5 + u3 * 0.5;
    if y1l > 0 then y1 = 1;
    else y1 = 0;
    if y1l > 0 then y2 = y2l;
    else y2 = 0;
    if y1l <= 0 then y3 = y3l;
    else y3 = 0;
    output;
  end;
run;

*** 1-STEP MLE ***;
proc qlim data = d1;
  model y1 = x1 x2 / discrete;
  model y2 = x1 / select(y1 = 1);
  model y3 = x2 / select(y1 = 0);
run;

/*
Parameter      DF      Estimate        Standard     t Value   Approx
                                       Error                  P-Value
y2.Intercept   1       0.931225        0.080241     11.61     <.0001
y2.x1          1       1.970194        0.06801      28.97     <.0001
_Sigma.y2      1       1.050489        0.042064     24.97     <.0001
y3.Intercept   1       0.936837        0.09473       9.89     <.0001
y3.x2          1      -2.043977        0.071986    -28.39     <.0001
_Sigma.y3      1       0.710451        0.037412     18.99     <.0001
y1.Intercept   1       1.040852        0.127171      8.18     <.0001
y1.x1          1       1.900394        0.19335       9.83     <.0001
y1.x2          1       2.590489        0.257989     10.04     <.0001
_Rho.y1.y2     1       0.147923        0.2156        0.69     0.4927
_Rho.y1.y3     1       0.324967        0.166508      1.95     0.051
*/

*** 2-STAGE APPROACH ***;
proc logistic data = d1 desc;
  model y1 = x1 x2 / link = probit;
  output out = d2 xbeta = xb;
run;
/*
Parameter  DF  Estimate  Standard  Wald          P-Value
                         Error     Chi-Square
Intercept  1   1.0406    0.1296     64.5117      <.0001
x1         1   1.8982    0.1973     92.5614      <.0001
x2         1   2.6223    0.2603    101.47        <.0001
*/

data d3;
  set d2;
  if y1 = 1 then imr = pdf('normal', xb) / cdf('normal', xb);
  else imr = pdf('normal', xb) / (1 - cdf('normal', xb));
run;

proc reg data = d3 plots = none;
  where y1 = 1;
  model y2 = x1 imr;
run;
/*
Variable   DF  Parameter  Standard  t Value  P-Value
               Estimate   Error
Intercept  1   0.94043    0.0766    12.28    <.0001
x1         1   1.96494    0.06689   29.38    <.0001
imr        1   0.11476    0.20048    0.57    0.5674
*/

proc reg data = d3 plots = none;
  where y1 = 0;
  model y3 = x2 imr;
run;
/*
Variable   DF  Parameter  Standard  t Value  P-Value
               Estimate   Error
Intercept  1    0.92982   0.09493     9.79   <.0001
x2         1   -2.04808   0.07194   -28.47   <.0001
imr        1   -0.21852   0.1244     -1.76   0.0807
*/

*** SET INITIAL VALUES IN MLE BASED ON TWO-STAGE OUTCOMES ***;
proc qlim data = d1;
  init y1.intercept = 1.0406  y1.x1 = 1.8982      y1.x2 = 2.6223
       y2.intercept = 0.9404  y2.x1 = 1.9649  _sigma.y2 = 1.0539
       y3.intercept = 0.9298  y3.x2 = -2.048  _sigma.y3 = 0.7070;
  model y1 = x1 x2 / discrete;
  model y2 = x1 / select(y1 = 1);
  model y3 = x2 / select(y1 = 0);
run;