I'm trying to implement an MM-estimator in python. I have a working implementation of an M-estimator statsmodels.RLM - which is implemented as an iteratively re-weighted least squares algorithm. I am struggling to implement the initial S-estimator, my understanding is that an S-estimator is an M-estimator of scale however none of the R packages (mass.rlm, robustbase.lmrob, robust.lmRob) I've found seem to implement this as an iteratively re-weighted least squares problem. Therefore, my question is what algorithm should I use to implement an S-estimator, and how should I implement this in python ideally only using numpy and scipy.
Here is my current code:
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
# Install robustbase
# from rpy2.robjects.packages import importr
# utils = importr("utils")
# utils.install_packages("robustbase")
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
robustbase = importr("robustbase")
ctrl = robustbase.lmrob_control()
rng = np.random.default_rng()
nsample = 50
x1 = np.linspace(0, 20, nsample)
x1[-10:] += 5
X = np.column_stack((np.ones_like(x1), x1, (x1 - 5) ** 2))
# X = sm.add_constant(X)
sig = 0.5 # smaller error variance makes OLS<->RLM contrast bigger
beta = [5, 0.5, -0.0]
y_true2 = np.dot(X, beta)
y2 = y_true2 + sig * 1.0 * np.random.normal(size=nsample)
y2[(a:=np.arange(0, 50, 4))] -= rng.random(a.size)*50
y2[-10:] -= 20
x2r = ro.r.matrix(X, nrow=nsample, ncol=3)
y2r = ro.FloatVector(y2)
def mad(x, axis=None, centre=0):
return np.median(np.abs(x - centre), axis=axis) / 0.6745
def huber_t_weights(z, t=1.547):
absz = np.abs(z)
test = absz <= t
absz[test] = 1
return test + (1 - test) * t / absz
def m_estimator(x, y, n):
wls = sm.WLS(y, x, weights=np.ones_like(y)).fit()
params = wls.params
resid = y - x @ params
scale = mad(resid)
for _ in range(n):
weights = huber_t_weights(resid / scale)
wls = sm.WLS(y, x, weights=weights).fit()
params = wls.params
resid = y - x @ params
scale = mad(resid)
return params
m_me = m_estimator(X, y2, 50)
mm_robustbase = robustbase.lmrob_fit(x2r, y2r, control=ctrl)[0] # MM estimator
m_sm = sm.RLM(y2, X).fit().params # M estimator
o_sm = sm.OLS(y2, X).fit().params # OLS estimator
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
ax.plot(x1, y2, "o", label="data")
ax.plot(x1, y_true2, "b-", label="True")
ax.plot(x1, X@mm_robustbase, "g-", label="MM-estimator (robustbase))")
ax.plot(x1, X@m_sm, "k-", label="M-estimator (statsmodels))")
ax.plot(x1, X@m_me, "y-", label="M-estimator (me)")
ax.plot(x1, X@o_sm, "r-", label="OLS (statsmodels)")
ax.legend()
