In [1]: import numpy as np
from scipy import stats
In [2]: import statsmodels.api as sm
In [3]: import matplotlib.pyplot as plt
In [4]: from statsmodels.sandbox.regression.predstd import wls_prediction_std
In [5]: nsample = 50
In [6]: x1 = np.linspace(0, 20, nsample)
In [7]: X = np.c_[x1, (x1-5)**2, np.ones(nsample)]
In [8]: sig = 0.3 # smaller error variance makes OLS<->RLM contrast bigger
In [9]: beta = [0.5, -0.0, 5.]
In [10]: y_true2 = np.dot(X, beta)
In [11]: y2 = y_true2 + sig*1. * np.random.normal(size=nsample)
In [12]: y2[[39,41,43,45,48]] -= 5 # add some outliers (10% of nsample)
Example: estimate quadratic function (true is linear)
In [13]: res = sm.OLS(y2, X).fit()
In [14]: print res.params
[ 0.52041769 -0.01343655 5.10209124]
Note: quadratic term captures outlier effect
In [15]: print res.bse
[ 0.07137208 0.00631533 0.46229484]
print res.predict
compare with robust estimator
In [16]: resrlm = sm.RLM(y2, X).fit()
In [17]: print resrlm.params
[ 0.50899669 -0.00311252 5.02369257]
In [18]: print resrlm.bse
[ 0.0194497 0.001721 0.12598057]
In [19]: plt.figure()
Out[19]: <matplotlib.figure.Figure at 0xf1840b0c>
In [20]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[20]:
[<matplotlib.lines.Line2D at 0xf1bd4dcc>,
<matplotlib.lines.Line2D at 0xf1bf9aac>]
In [21]: prstd, iv_l, iv_u = wls_prediction_std(res)
In [22]: plt.plot(x1, res.fittedvalues, 'r-')
Out[22]: [<matplotlib.lines.Line2D at 0xf1be65ac>]
In [23]: plt.plot(x1, iv_u, 'r--')
Out[23]: [<matplotlib.lines.Line2D at 0xf1840e4c>]
In [24]: plt.plot(x1, iv_l, 'r--')
Out[24]: [<matplotlib.lines.Line2D at 0xf1a31d6c>]
In [25]: plt.plot(x1, resrlm.fittedvalues, 'g.-')
Out[25]: [<matplotlib.lines.Line2D at 0xf17a036c>]
In [26]: plt.title('blue: true, red: OLS, green: RLM')
Out[26]: <matplotlib.text.Text at 0xf1bc19cc>
Example: estimate linear function (true is linear)
In [27]: X2 = X[:,[0,2]] # use only linear term and constant
In [28]: res2 = sm.OLS(y2, X2).fit()
In [29]: print res2.params
[ 0.38605223 5.64366631]
Note: quadratic term captures outlier effect
In [30]: print res2.bse
[ 0.03445102 0.3998306 ]
print res2.predict
In [31]: prstd, iv_l, iv_u = wls_prediction_std(res2)
compare with robust estimator
In [32]: resrlm2 = sm.RLM(y2, X2).fit()
In [33]: print resrlm2.params
[ 0.48344129 5.12227932]
In [34]: print resrlm2.bse
[ 0.0094077 0.10918363]
In [35]: plt.figure()
Out[35]: <matplotlib.figure.Figure at 0xf1a3124c>
In [36]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[36]:
[<matplotlib.lines.Line2D at 0xf1a219ec>,
<matplotlib.lines.Line2D at 0xf1a0840c>]
In [37]: plt.plot(x1, res2.fittedvalues, 'r-')
Out[37]: [<matplotlib.lines.Line2D at 0xf1a312cc>]
In [38]: plt.plot(x1, iv_u, 'r--')
Out[38]: [<matplotlib.lines.Line2D at 0xf1a0ca8c>]
In [39]: plt.plot(x1, iv_l, 'r--')
Out[39]: [<matplotlib.lines.Line2D at 0xf1a0c84c>]
In [40]: plt.plot(x1, resrlm2.fittedvalues, 'g.-')
Out[40]: [<matplotlib.lines.Line2D at 0xf1997b2c>]
In [41]: plt.title('blue: true, red: OLS, green: RLM')
Out[41]: <matplotlib.text.Text at 0xf19b086c>
see also help(sm.RLM.fit) for more options and module sm.robust.scale for scale options
plt.show()