LMFIT vs. Scipy Why am I getting different results in minimize

296 views Asked by At

In short, I have data I'm trying to fit, and LMFIT and Scipy give 2 different solutions, with LMFIT being significantly worse.

Just some detail as to what is going on:

The "model" is a population weighted average. The "populations" are derived from 4 variables (the adjustable parameters). There are 2 independent "models" (n and h), but these are combined at the very end. The residuals is a combination of the residuals of both of these models against their respective data.

Assume the math is correct. The question is then why the different solutions when given identical functions, variables, bounds, and methods.

Given this MVE

import numpy as np
import scipy.optimize as so
from scipy.sparse.linalg import lsmr
from lmfit import minimize,Parameters
from lmfit.printfuncs import report_fit

data_1=[[117.417, 117.423, 117.438, 117.501], [124.16, 124.231, 124.089, 124.1], [115.632, 115.645, 115.828, 115.947], [118.314, 118.317, 118.287, 118.228], [108.407, 108.419, 108.396, 108.564]]
data_2=[[9.05, 9.044, 9.057, 9.079], [9.178, 9.167, 9.16, 9.176], [7.888, 7.893, 7.911, 7.895], [7.198, 7.202, 7.197, 7.213], [7.983, 7.976, 7.979, 8.02]]
def get_populations_lmfit(initial,io):
    k,k1,x,y=initial['kvar'],initial['k1var'],initial['xvar'],initial['yvar']
    kx,k1x=k*x,k1*x
    ky,k1y=k*y,k1*y
    kxy,k1xy=k*x*y,k1*x*y   
    partial_free_concentration_WT=(np.sqrt((k*k1)**2+(8*io*k*k1)+(8*io*k*k1**2))-(k*k1))/(4*(1+k1))
    partial_closed_concentration_WT=(((4*io)+(4*io*k1))/(4*(1+k1)**2))-(partial_free_concentration_WT/(1+k1))
    partial_open_concentration_WT=k1*partial_closed_concentration_WT
    partial_free_concentration_L273A=(np.sqrt((kx*k1x)**2+(8*io*kx*k1x)+(8*io*kx*k1x**2))-(kx*k1x))/(4*(1+k1x))
    partial_closed_concentration_L273A=(((4*io)+(4*io*k1x))/(4*(1+k1x)**2))-(partial_free_concentration_L273A/(1+k1x))
    partial_open_concentration_L273A=k1x*partial_closed_concentration_L273A
    partial_free_concentration_I272A=(np.sqrt((ky*k1y)**2+(8*io*ky*k1y)+(8*io*ky*k1y**2))-(ky*k1y))/(4*(1+k1y))
    partial_closed_concentration_I272A=(((4*io)+(4*io*k1y))/(4*(1+k1y)**2))-(partial_free_concentration_I272A/(1+k1y))
    partial_open_concentration_I272A=k1y*partial_closed_concentration_I272A
    partial_free_concentration_ILAA=(np.sqrt((kxy*k1xy)**2+(8*io*kxy*k1xy)+(8*io*kxy*k1xy**2))-(kxy*k1xy))/(4*(1+k1xy))
    partial_closed_concentration_ILAA=(((4*io)+(4*io*k1xy))/(4*(1+k1xy)**2))-(partial_free_concentration_ILAA/(1+k1xy))
    partial_open_concentration_ILAA=k1xy*partial_closed_concentration_ILAA
    local_chi2=0
    for experimental_shifts_n,experimental_shifts_h in zip(data_1,data_2):
        populations=np.array([[partial_free_concentration_WT,partial_open_concentration_WT,partial_closed_concentration_WT],[partial_free_concentration_L273A,partial_open_concentration_L273A,partial_closed_concentration_L273A],[partial_free_concentration_I272A,partial_open_concentration_I272A,partial_closed_concentration_I272A],[partial_free_concentration_ILAA,partial_open_concentration_ILAA,partial_closed_concentration_ILAA]])
        experimental_shifts_n=(np.array([experimental_shifts_n])/10)*800
        experimental_shifts_h=(np.array([experimental_shifts_h]))*800
        least_squared_fit_n=lsmr(populations/io,experimental_shifts_n,maxiter=10)
        least_squared_fit_h=lsmr(populations/io,experimental_shifts_h,maxiter=10)
        local_chi2+=((least_squared_fit_n[3])**2+least_squared_fit_h[3]**2)
    return local_chi2

def get_populations_scipy(initial,io):
    k,k1,x,y=initial[0],initial[1],initial[2],initial[3]
    kx,k1x=k*x,k1*x
    ky,k1y=k*y,k1*y
    kxy,k1xy=k*x*y,k1*x*y
    partial_free_concentration_WT=(np.sqrt((k*k1)**2+(8*io*k*k1)+(8*io*k*k1**2))-(k*k1))/(4*(1+k1))
    partial_closed_concentration_WT=(((4*io)+(4*io*k1))/(4*(1+k1)**2))-(partial_free_concentration_WT/(1+k1))
    partial_open_concentration_WT=k1*partial_closed_concentration_WT
    partial_free_concentration_L273A=(np.sqrt((kx*k1x)**2+(8*io*kx*k1x)+(8*io*kx*k1x**2))-(kx*k1x))/(4*(1+k1x))
    partial_closed_concentration_L273A=(((4*io)+(4*io*k1x))/(4*(1+k1x)**2))-(partial_free_concentration_L273A/(1+k1x))
    partial_open_concentration_L273A=k1x*partial_closed_concentration_L273A
    partial_free_concentration_I272A=(np.sqrt((ky*k1y)**2+(8*io*ky*k1y)+(8*io*ky*k1y**2))-(ky*k1y))/(4*(1+k1y))
    partial_closed_concentration_I272A=(((4*io)+(4*io*k1y))/(4*(1+k1y)**2))-(partial_free_concentration_I272A/(1+k1y))
    partial_open_concentration_I272A=k1y*partial_closed_concentration_I272A
    partial_free_concentration_ILAA=(np.sqrt((kxy*k1xy)**2+(8*io*kxy*k1xy)+(8*io*kxy*k1xy**2))-(kxy*k1xy))/(4*(1+k1xy))
    partial_closed_concentration_ILAA=(((4*io)+(4*io*k1xy))/(4*(1+k1xy)**2))-(partial_free_concentration_ILAA/(1+k1xy))
    partial_open_concentration_ILAA=k1xy*partial_closed_concentration_ILAA
    local_chi2=0
    for experimental_shifts_n,experimental_shifts_h in zip(data_1,data_2):
        populations=np.array([[partial_free_concentration_WT,partial_open_concentration_WT,partial_closed_concentration_WT],[partial_free_concentration_L273A,partial_open_concentration_L273A,partial_closed_concentration_L273A],[partial_free_concentration_I272A,partial_open_concentration_I272A,partial_closed_concentration_I272A],[partial_free_concentration_ILAA,partial_open_concentration_ILAA,partial_closed_concentration_ILAA]])
        experimental_shifts_n=(np.array([experimental_shifts_n])/10)*800
        experimental_shifts_h=(np.array([experimental_shifts_h]))*800
        least_squared_fit_n=lsmr(populations/io,experimental_shifts_n,maxiter=10)
        least_squared_fit_h=lsmr(populations/io,experimental_shifts_h,maxiter=10)
        local_chi2+=((least_squared_fit_n[3])**2+least_squared_fit_h[3]**2)
    return local_chi2

io=270000
params=Parameters()
params.add('kvar',value=500,min=0,max=np.inf)
params.add('k1var',value=0.02,min=0,max=np.inf)
params.add('xvar',value=7,min=0,max=np.inf)
params.add('yvar',value=30,min=0,max=np.inf)
lmfit_solution=minimize(get_populations_lmfit,params,args=(io,),method='nelder',max_nfev=1000)
scipy_solution=so.minimize(get_populations_scipy,args=(io,), x0=np.array([500,0.02,7,30]),bounds=((0,np.inf),)*4,method='Nelder-Mead',options={'maxiter':1000})
print(report_fit(lmfit_solution))
print(scipy_solution)

You will note the solutions from scipy vs. lmfit are completely different. Not only that, the chi2 for scipy is significantly better than lmfit. I don't quite understand why however. The setup is practically identical, the conditions and bounds and methods are identical, why do they give different results? I.E. Why is LMFITs solution so much worse?

Now I know you can give LMFIT an array of residuals instead of the sum like I have, and while this does improve the solutions a bit, it's still worse than Scipy and significantly worse chi2.

1

There are 1 answers

4
Reinderien On

I think you're asking the wrong questions. lmfit has no advantage here, so the easy decision is to scrap it and use scipy directly; but - your current code is cursed, so that should be fixed up. Vectorise, factor out common expressions, and reduce to a single inner lstsq call. The following shows that the old and new methods are numerically equivalent to ~13 significant digits, but the new method will be faster and is more simple to debug. Among many other reasons, your old code went through all of the trouble of doing many inner linear fits, but then always threw away the result.

import numpy as np
import scipy.optimize as so

data_1 = np.array([
    [117.417, 117.423, 117.438, 117.501],
    [124.160, 124.231, 124.089, 124.100],
    [115.632, 115.645, 115.828, 115.947],
    [118.314, 118.317, 118.287, 118.228],
    [108.407, 108.419, 108.396, 108.564],
])
data_2 = np.array([
    [9.050, 9.044, 9.057, 9.079],
    [9.178, 9.167, 9.160, 9.176],
    [7.888, 7.893, 7.911, 7.895],
    [7.198, 7.202, 7.197, 7.213],
    [7.983, 7.976, 7.979, 8.020],
])


def setup_for_chi(initial: np.ndarray, io: float) -> tuple[
    np.ndarray,
    np.ndarray,
    np.ndarray,
]:
    k, k1, x, y = initial
    xy1 = np.array((1, x, y, x*y))
    k4 = k*xy1
    k14 = k1*xy1

    partial_free_concentration = (
        np.sqrt(
            (k4*k14)**2
            + 8*io*k4*k14*(1 + k14)
        ) - k4*k14
    )/(4*(1 + k14))

    partial_closed_concentration = (
        4*io + 4*io*k14
    )/(
        4*(1 + k14)**2
    ) - partial_free_concentration/(1 + k14)

    partial_open_concentration = k14 * partial_closed_concentration

    populations = np.stack((
        partial_free_concentration,
        partial_open_concentration,
        partial_closed_concentration,
    ), axis=1)

    return (
        np.broadcast_to(populations/io, (len(data_1), *populations.shape)),
        data_1*80,
        data_2*800,
    )


def fit_populations(initial: np.ndarray, io: float) -> tuple[np.ndarray, float]:
    populations_io, experimental_shifts_n, experimental_shifts_h = setup_for_chi(initial, io)

    # 4x3
    A = populations_io[0, ...]

    # 4x10
    b = np.vstack((
        experimental_shifts_n,
        experimental_shifts_h,
    )).T

    x, chi2, rank, s = np.linalg.lstsq(a=A, b=b, rcond=None)
    return x, chi2.sum()


def get_populations(initial: np.ndarray, io: float) -> float:
    x, chi2 = fit_populations(initial, io)
    return chi2


def get_populations_old(initial,io, debug=False):
    from scipy.sparse.linalg import lsmr
    k,k1,x,y=initial[0],initial[1],initial[2],initial[3]
    kx,k1x=k*x,k1*x
    ky,k1y=k*y,k1*y
    kxy,k1xy=k*x*y,k1*x*y
    partial_free_concentration_WT=(np.sqrt((k*k1)**2+(8*io*k*k1)+(8*io*k*k1**2))-(k*k1))/(4*(1+k1))
    partial_closed_concentration_WT=(((4*io)+(4*io*k1))/(4*(1+k1)**2))-(partial_free_concentration_WT/(1+k1))
    partial_open_concentration_WT=k1*partial_closed_concentration_WT
    partial_free_concentration_L273A=(np.sqrt((kx*k1x)**2+(8*io*kx*k1x)+(8*io*kx*k1x**2))-(kx*k1x))/(4*(1+k1x))
    partial_closed_concentration_L273A=(((4*io)+(4*io*k1x))/(4*(1+k1x)**2))-(partial_free_concentration_L273A/(1+k1x))
    partial_open_concentration_L273A=k1x*partial_closed_concentration_L273A
    partial_free_concentration_I272A=(np.sqrt((ky*k1y)**2+(8*io*ky*k1y)+(8*io*ky*k1y**2))-(ky*k1y))/(4*(1+k1y))
    partial_closed_concentration_I272A=(((4*io)+(4*io*k1y))/(4*(1+k1y)**2))-(partial_free_concentration_I272A/(1+k1y))
    partial_open_concentration_I272A=k1y*partial_closed_concentration_I272A
    partial_free_concentration_ILAA=(np.sqrt((kxy*k1xy)**2+(8*io*kxy*k1xy)+(8*io*kxy*k1xy**2))-(kxy*k1xy))/(4*(1+k1xy))
    partial_closed_concentration_ILAA=(((4*io)+(4*io*k1xy))/(4*(1+k1xy)**2))-(partial_free_concentration_ILAA/(1+k1xy))
    partial_open_concentration_ILAA=k1xy*partial_closed_concentration_ILAA
    local_chi2=0
    for experimental_shifts_n,experimental_shifts_h in zip(data_1,data_2):
        populations=np.array([[partial_free_concentration_WT,partial_open_concentration_WT,partial_closed_concentration_WT],[partial_free_concentration_L273A,partial_open_concentration_L273A,partial_closed_concentration_L273A],[partial_free_concentration_I272A,partial_open_concentration_I272A,partial_closed_concentration_I272A],[partial_free_concentration_ILAA,partial_open_concentration_ILAA,partial_closed_concentration_ILAA]])
        experimental_shifts_n=(np.array([experimental_shifts_n])/10)*800
        experimental_shifts_h=(np.array([experimental_shifts_h]))*800
        least_squared_fit_n=lsmr(populations/io,experimental_shifts_n,maxiter=10)
        least_squared_fit_h=lsmr(populations/io,experimental_shifts_h,maxiter=10)
        if debug:
            print(least_squared_fit_n[0])
            print(least_squared_fit_h[0])
        local_chi2+=((least_squared_fit_n[3])**2+least_squared_fit_h[3]**2)
    return local_chi2


io = 270_000


def new_optimize() -> None:
    print('New:')
    solution = so.minimize(
        fun=get_populations,
        args=(io,),
        x0=np.array([500, 0.02, 7, 30]),
        bounds=((0, np.inf),)*4,
        options={'maxiter': 1_000},
    )
    assert solution.success, solution.message
    print('Initial:', solution.x)
    x, chi2 = fit_populations(solution.x, io)
    print('Residual:', chi2)


def regression_test() -> None:
    popio, exp_n, exp_h = setup_for_chi(initial=np.array([500, 0.02, 7, 30]), io=io)

    assert len(popio) == 5

    for pop in popio:
        assert np.allclose(
            pop,
            np.array([
                [0.00425185, 0.01952447, 0.97622368],
                [0.02781779, 0.11939080, 0.85279142],
                [0.09698655, 0.33863005, 0.56438341],
                [0.32547629, 0.54480761, 0.1297161]],
            ),
        )

    assert np.allclose(
        np.vstack(exp_n),
        np.array([
            [9393.36, 9393.84, 9395.04, 9400.08],
            [9932.8 , 9938.48, 9927.12, 9928.  ],
            [9250.56, 9251.6 , 9266.24, 9275.76],
            [9465.12, 9465.36, 9462.96, 9458.24],
            [8672.56, 8673.52, 8671.68, 8685.12],
        ]),
    )
    assert np.allclose(
        np.vstack(exp_h),
        np.array([
            [7240. , 7235.2, 7245.6, 7263.2],
            [7342.4, 7333.6, 7328. , 7340.8],
            [6310.4, 6314.4, 6328.8, 6316. ],
            [5758.4, 5761.6, 5757.6, 5770.4],
            [6386.4, 6380.8, 6383.2, 6416. ],
        ]),
    )


def old_optimize() -> None:
    scipy_solution = so.minimize(get_populations_old, args=(io,), x0=np.array([500, 0.02, 7, 30]),
                                 bounds=((0, np.inf),) * 4, method='Nelder-Mead',
                                 options={'maxiter': 1000})
    assert scipy_solution.success, scipy_solution.message
    print('OLD -')
    print('Initial:', scipy_solution.x)
    print('Residual:', scipy_solution.fun)
    print('Inner factor:')
    get_populations_old(scipy_solution.x, io, debug=True)
    x, chi2 = fit_populations(scipy_solution.x, io)
    print('chi2 from new, to compare:', chi2)
    print('Inner factor from new:')
    print(x)
    print()


if __name__ == '__main__':
    regression_test()
    old_optimize()
    new_optimize()
OLD -
Initial: [8.43488198e+02 3.73287986e-02 1.24710782e+00 5.87182669e+00]
Residual: 61.1805732870818
Inner factor:
[13574.53212153  8420.98162826  9397.58024441]
[22773.61024014  3634.75940246  7252.0978788 ]
[11112.84033704  9604.77003648  9938.98540985]
[23099.71790687  3549.89959355  7358.35542221]
[14598.2687031   8102.07994522  9252.20266122]
[-9605.44078682 10177.17124497  6290.34488533]
[ 5574.63331111 10364.43102419  9461.76279142]
[17134.2141557   3071.33941429  5772.55420423]
[20950.90064132  5776.67678267  8686.4137172 ]
[37642.15814366  -977.72464129  6417.34277753]
chi2 from new, to compare: 61.18057328708239
Inner factor from new:
[[13574.53212153 11112.84033703 14598.26870309  5574.63331116
  20950.90064131 22773.61024013 23099.71790684 -9605.44078684
  17134.21415572 37642.15814367]
 [ 8420.98162821  9604.77003642  8102.07994516 10364.43102393
   5776.6767826   3634.75940237  3549.89959343 10177.17124489
   3071.33941434  -977.72464127]
 [ 9397.58024442  9938.98540985  9252.20266123  9461.76279145
   8686.4137172   7252.09787881  7358.35542223  6290.34488534
   5772.55420422  6417.34277752]]

New:
Initial: [4.99999627e+02 1.02141914e-01 1.65423176e+00 3.11581574e+01]
Residual: 61.18057338613728